"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[895],{60300:(e,t,n)=>{n.d(t,{A:()=>c});var i=n(74848),a=n(65243),s=n(91106),o=n.n(s);let r={primary:"bg-blue-600 text-white hover:bg-blue-500 enabled:hover:text-white/80 transition-colors",secondary:"button",tertiary:"bg-white/10 rounded-lg px-4 py-2 hover:bg-white/20 transition disabled:bg-gray/80 text-white text-base font-semibold"},l={small:"px-4 py-2 text-sm",default:"px-6 py-3 text-base"};function c(e){let{className:t,onClick:n,variant:s="primary",size:c="default",children:h,type:d,disabled:p,title:u,...m}=e,{href:g,target:f}=m,y=(0,a.QP)("font-display inline-flex items-center justify-center rounded text-base font-semibold text-white leading-tight disabled:!select-none disabled:!bg-gray-400",r[s],l[c],t);return g?(0,i.jsx)(o(),{href:g,target:f,className:y,title:u,children:h}):(0,i.jsx)("button",{className:y,onClick:n,type:d,disabled:p,title:u,children:h})}},34760:(e,t,n)=>{n.d(t,{A:()=>o});var i=n(74848),a=n(32485),s=n.n(a);function o(e){let{size:t,className:n,children:a}=e;switch(t){case"h1":return(0,i.jsx)("h1",{className:s()("font-bold leading-tighter text-3xl md:text-5xl",n),children:a});case"h2":return(0,i.jsx)("h2",{className:s()("font-bold text-2xl md:text-4xl",n),children:a});case"h3":return(0,i.jsx)("h3",{className:s()("font-bold leading-tight text-xl md:text-3xl",n),children:a});case"h4":return(0,i.jsx)("h4",{className:s()("font-bold leading-tight text-lg md:text-2xl",n),children:a});case"h5":return(0,i.jsx)("h5",{className:s()("eyebrow",n),children:a});default:return(0,i.jsx)("p",{className:s()("font-bold leading-tight text-white",n),children:a})}}},49788:(e,t,n)=>{n.d(t,{A:()=>c});var i=n(74848),a=n(54701),s=n(32485),o=n.n(s),r=n(34760),l=n(8499);function c(e){let{homepage:t=!0}=e;return(0,i.jsx)(a.kQ,{speed:-10,className:o()({"h-48 md:h-64":!t,"h-64 md:h-96":t}),children:(0,i.jsx)("div",{className:"".concat("after:absolute after:block after:bg-blue-400 after:blur-3xl after:content-[' '] after:h-96 after:opacity-5 after:right-0 after:rounded-full after:top-20 after:w-1/2 after:z-0"," ").concat("before:absolute before:block before:blur-3xl before:bg-orange-400 before:content-[' '] before:h-96 before:left-0 before:opacity-5 before:rounded-full before:w-1/2 before:z-0"," w-full h-full relative"),children:(0,i.jsx)("div",{className:"relative z-10 w-full h-[155%] -top-[25%] bg-no-repeat bg-cover bg-bottom flex items-center justify-center",style:{backgroundImage:"url(".concat((0,l.h)("/grid.svg"),")")},children:t&&(0,i.jsx)(r.A,{className:"text-center max-w-3xl pt-8",size:"h1",children:"Elastic\xa0Observability\xa0Labs"})})})})}},88796:(e,t,n)=>{n.d(t,{A:()=>W});var i=n(74848),a=n(96540),s=n(86715),o=n(13368),r=n.n(o),l=n(32485),c=n.n(l),h=n(94624),d=n.n(h),p=n(4724),u=n(99106),m=n(98016),g=n(37639),f=n(29965),y=n.n(f),b=n(91106),w=n.n(b),v=n(61053),x=n(19130),j=n(28872),E=n(13537),T=n(54784);function k(e){let{path:t,text:n,icon:s,active:o,links:r}=e,l=r&&r.length>6,h=c()("px-1 py-1 flex flex-col space-y-1",{"md:grid md:grid-cols-2 gap-1":l}),d=c()("flex lg:inline-flex font-light my-1 py-1 px-2 font-display font-semibold lg:text-sm xl:text-base items-center transition hover:hover-link hover:text-white focus:accessible-link-focus",{"hover-link":o}),p="ml-1 -mr-1 h-4 w-4 text-zinc-400 relative top-[1px]",u=c()("absolute left-1/2 mt-2 w-64 origin-top-center ring-1 ring-black ring-opacity-5 focus:outline-none transform -translate-x-1/2",{"md:w-[30rem]":l});return r?(0,i.jsx)(v.W1,{as:"div",className:"relative",children:e=>{let{open:t}=e;return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)("div",{children:(0,i.jsxs)(v.W1.Button,{className:d,children:[n,t?(0,i.jsx)(j.A,{className:p,"aria-hidden":"true"}):(0,i.jsx)(E.A,{className:p,"aria-hidden":"true"})]})}),(0,i.jsx)(x.e,{as:a.Fragment,enter:"transition ease-out duration-100",enterFrom:"transform opacity-0 scale-95",enterTo:"transform opacity-100 scale-100",leave:"transition ease-in duration-75",leaveFrom:"transform opacity-100 scale-100",leaveTo:"transform opacity-0 scale-95",children:(0,i.jsx)(v.W1.Items,{className:u,children:(0,i.jsx)(T.A,{size:"small",children:(0,i.jsx)("div",{className:h,children:r.map((e,t)=>(0,i.jsx)(v.W1.Item,{children:()=>(0,i.jsx)(w(),{href:e.path,className:d,children:e.text})},t))})})})})]})}}):(0,i.jsxs)(w(),{className:d,href:t,children:[s,(0,i.jsx)("span",{className:s?"hidden xl:block":"",children:n})]})}function A(e){let{className:t}=e;return(0,i.jsxs)("svg",{width:"24",height:"21",xmlns:"https://www.w3.org/2000/svg",className:t,viewBox:"0 0 24 21",fill:"none",children:[(0,i.jsx)("path",{d:"M7.45008 14.2379C7.94814 15.4192 9.07839 16.2437 10.3929 16.2437H15.9032L20.0358 20.4424V16.2437C21.8111 16.2437 23.2502 14.74 23.2502 12.8849V7.84663C23.2502 5.99158 21.8111 4.48777 20.0358 4.48777H18.4287V10.9879C18.4287 12.7829 17.0855 14.2379 15.4287 14.2379H7.45008Z",fill:"white"}),(0,i.jsx)("path",{d:"M7.35776 11.019H13.6071C14.4947 11.019 15.2144 10.2671 15.2144 9.33955V4.30126C15.2144 3.37372 14.4947 2.62181 13.6071 2.62181H3.96429C3.07668 2.62181 2.35715 3.37372 2.35715 4.30126V9.33955C2.35715 10.2671 3.07668 11.019 3.96429 11.019H5.57144V13.1968L7.35776 11.019ZM8.09694 12.6984L3.96429 16.897V12.6984C2.18909 12.6984 0.75 11.1946 0.75 9.33955V4.30126C0.75 2.44619 2.18909 0.942383 3.96429 0.942383H13.6071C15.3824 0.942383 16.8215 2.44619 16.8215 4.30126V9.33955C16.8215 11.1946 15.3824 12.6984 13.6071 12.6984H8.09694Z",fill:"white"})]})}function O(e){let{className:t}=e;return(0,i.jsx)("svg",{width:"98",height:"96",xmlns:"https://www.w3.org/2000/svg",className:t,viewBox:"0 0 98 96",fill:"none",children:(0,i.jsx)("path",{fillRule:"evenodd",clipRule:"evenodd",d:"M48.854 0C21.839 0 0 22 0 49.217c0 21.756 13.993 40.172 33.405 46.69 2.427.49 3.316-1.059 3.316-2.362 0-1.141-.08-5.052-.08-9.127-13.59 2.934-16.42-5.867-16.42-5.867-2.184-5.704-5.42-7.17-5.42-7.17-4.448-3.015.324-3.015.324-3.015 4.934.326 7.523 5.052 7.523 5.052 4.367 7.496 11.404 5.378 14.235 4.074.404-3.178 1.699-5.378 3.074-6.6-10.839-1.141-22.243-5.378-22.243-24.283 0-5.378 1.94-9.778 5.014-13.2-.485-1.222-2.184-6.275.486-13.038 0 0 4.125-1.304 13.426 5.052a46.97 46.97 0 0 1 12.214-1.63c4.125 0 8.33.571 12.213 1.63 9.302-6.356 13.427-5.052 13.427-5.052 2.67 6.763.97 11.816.485 13.038 3.155 3.422 5.015 7.822 5.015 13.2 0 18.905-11.404 23.06-22.324 24.283 1.78 1.548 3.316 4.481 3.316 9.126 0 6.6-.08 11.897-.08 13.526 0 1.304.89 2.853 3.316 2.364 19.412-6.52 33.405-24.935 33.405-46.691C97.707 22 75.788 0 48.854 0z",fill:"currentColor"})})}var _=n(76424),S=n(8499),P=n(16939),I=n(30772);let C={navigationLinks:[{path:"/blog",text:"Articles"},{text:"Tags",links:(0,P.Yr)().map(e=>({path:(0,I.w_)(e.slug),text:e.title}))}]};var L=n(60300);function M(){let e=(0,s.useRouter)(),t=(0,_.usePathname)(),n="".concat("https://search.elastic.co/?location%5B0%5D=Observability%20Labs","&referrer=").concat("https://www.elastic.co/observability-labs").concat(e.asPath),o=()=>(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(L.A,{size:"small",href:"https://cloud.elastic.co/registration?cta=cloud-registration&tech=trial&plcmt=navigation&pg=observability-labs",className:"flex-1 lg:flex-auto",children:"Start free trial"}),(0,i.jsx)(L.A,{size:"small",variant:"secondary",href:"https://www.elastic.co/contact",className:"flex-1 lg:flex-auto",children:"Contact sales"})]}),r=(0,a.useMemo)(()=>(0,i.jsx)(w(),{href:n,className:"rounded flex items-center p-4 text-white focus:outline-none focus:ring-0 focus:ring-offset-1 focus:ring-offset-zinc-600 group",children:(0,i.jsx)("div",{className:"flex items-center relative font-display",children:(0,i.jsx)(p.A,{className:"h-6 w-6","aria-hidden":"true"})})}),[n]);return(0,i.jsx)(i.Fragment,{children:(0,i.jsx)(g.EN,{as:"nav",className:"fixed w-full z-40",children:e=>{let{open:n}=e;return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)("div",{className:"bg-gradient-to-b from-zinc-900 from-20% h-[200%] to-transparent absolute inset-0 z-0 pointer-events-none"}),(0,i.jsx)("div",{className:"container relative z-10",children:(0,i.jsxs)("div",{className:"flex h-16 items-center justify-between",children:[(0,i.jsxs)("div",{className:"flex items-center justify-start w-full",children:[(0,i.jsx)("div",{children:(0,i.jsx)(w(),{href:"/",className:"hover:opacity-50 transition",children:(0,i.jsx)(y(),{alt:"Elastic Observability Labs logo",height:30,priority:!0,src:(0,S.h)("/logo.svg"),width:241})})}),(0,i.jsx)("div",{className:"hidden lg:ml-6 lg:block",children:(0,i.jsx)("div",{className:"flex space-x-4",children:C.navigationLinks.map((e,n)=>{let a=t===e.path;return(0,i.jsx)(k,{path:e.path,text:e.text,links:e.links,active:a},"navLink-".concat(n))})})}),(0,i.jsx)("div",{className:"hidden lg:ml-auto lg:block",children:(0,i.jsxs)("div",{className:"flex items-center space-x-4",children:[r,(0,i.jsx)(w(),{className:"inline-flex items-center font-semibold text-white",href:"https://github.com/elastic/observability-examples",children:(0,i.jsx)(O,{className:"h-5 w-5 mr-2"})}),(0,i.jsx)(w(),{className:"inline-flex items-center font-semibold text-white",href:"https://discuss.elastic.co/c/observability/82",children:(0,i.jsx)(A,{className:"h-6 w-6 mt-1 mr-2"})}),o()]})})]}),(0,i.jsxs)("div",{className:"-mr-2 flex lg:hidden",children:[r,(0,i.jsxs)(g.EN.Button,{className:"inline-flex items-center justify-center rounded-md p-2 text-gray-400 hover:bg-gray-700 hover:text-white focus:outline-none focus:ring-2 focus:ring-inset focus:ring-white",children:[(0,i.jsx)("span",{className:"sr-only",children:"Open navigation menu"}),n?(0,i.jsx)(u.A,{className:"block h-6 w-6","aria-hidden":"true"}):(0,i.jsx)(m.A,{className:"block h-6 w-6","aria-hidden":"true"})]})]})]})}),(0,i.jsx)(g.EN.Panel,{className:"lg:hidden relative bg-zinc-950 flex-1 h-[calc(100vh-64px)] flex items-stretch",children:(0,i.jsxs)("div",{className:"container flex-1 flex flex-col relative",children:[(0,i.jsx)("div",{className:" border-t border-zinc-800 flex flex-col justify-between relative h-[calc(100%-56px)] overflow-auto",children:(0,i.jsxs)("div",{className:"py-5 flex flex-col gap-2 flex-1 text-2xl font-medium text-white",children:[(0,i.jsx)(w(),{href:"/",children:"All Articles"}),(0,i.jsx)("div",{className:"text-xl mt-2 text-zinc-200",children:"Popular tags"}),(0,P.Yr)().map(e=>(0,i.jsx)(w(),{href:(0,I.w_)(e.slug),children:e.title},e.slug))]})}),(0,i.jsx)("div",{className:"border-t border-zinc-700 flex items-center justify-center gap-4 h-14 absolute inset-x-0 bottom-0 z-10 px-4",children:o()})]})})]})}})})}var R=n(87839),D=n(52346);function z(){let e=new Date().getFullYear();return(0,i.jsx)("footer",{className:"mt-auto text-xs md:text-sm",children:(0,i.jsxs)("div",{className:"container py-6 flex flex-col md:flex-row gap-2 md:gap-0 justify-between items-center",children:[(0,i.jsx)("div",{className:"text-zinc-300",children:(0,i.jsx)("nav",{children:(0,i.jsxs)("ul",{className:"flex space-x-4",children:[(0,i.jsx)("li",{children:(0,i.jsxs)(w(),{href:"".concat("https://www.elastic.co/observability-labs","/rss/feed.xml"),className:"hover:text-white font-medium flex items-center gap-1",children:[(0,i.jsx)(D.A,{className:"w-3 h-3"})," Subscribe"]})}),(0,i.jsx)("li",{children:(0,i.jsx)(w(),{href:"/sitemap.xml/",className:"hover:text-white font-medium",children:"Sitemap"})}),(0,i.jsx)("li",{children:(0,i.jsxs)(w(),{href:"https://elastic.co?utm_source=elastic-security-labs&utm_medium=referral&utm_campaign=security-labs&utm_content=footer",className:"hover:text-white font-medium flex items-center space-x-1",children:[(0,i.jsx)(R.A,{className:"inline-block w-3 h-3"}),(0,i.jsx)("span",{children:"Elastic.co"})]})})]})})}),(0,i.jsx)("div",{className:"flex flex-col space-y-1 text-zinc-300",children:(0,i.jsxs)("p",{children:["\xa9 ",e,". Elasticsearch B.V. All Rights Reserved."]})})]})})}var N=n(77836);function W(e){let{title:t,description:n,image:o,showScrollIndicator:l,children:h}=e,p=(0,s.useRouter)(),[u,m]=(0,a.useState)(0),g=()=>m(document.documentElement.scrollTop/(document.documentElement.scrollHeight-document.documentElement.clientHeight)*100);(0,a.useEffect)(()=>{window.addEventListener("scroll",g);let e=e=>{"k"===e.key&&(e.metaKey||e.ctrlKey)&&(e.preventDefault(),p.push("/search"))};return window.addEventListener("keydown",e),()=>{window.removeEventListener("scroll",g),window.removeEventListener("keydown",e)}},[u,p]);let f=d().randomBytes(16).toString("hex"),y=t?"".concat(t," — Elastic Observability Labs"):"Elastic Observability Labs",b=n||"Explore Elastic Observability Labs for expert-led resources and hands-on learning. Enhance your skills and optimize your observability strategy with Elastic.",w=o?"".concat("https://www.elastic.co/observability-labs").concat(o,"?").concat(f):"".concat("https://www.elastic.co/observability-labs","/assets/observability-labs-thumbnail.png?").concat(f);return(0,i.jsxs)(a.Fragment,{children:[(0,i.jsxs)(r(),{children:[(0,i.jsx)("title",{children:y}),(0,i.jsx)("meta",{name:"description",content:b}),(0,i.jsx)("meta",{property:"og:title",content:y}),(0,i.jsx)("meta",{property:"og:description",content:b}),(0,i.jsx)("meta",{property:"og:image",content:w}),(0,i.jsx)("meta",{property:"og:image:alt",content:b}),(0,i.jsx)("meta",{property:"og:site_name",content:N.env.siteTitle}),(0,i.jsx)("meta",{property:"og:url",content:"".concat("https://www.elastic.co/observability-labs").concat(p.asPath)}),(0,i.jsx)("meta",{property:"og:type",content:"website"}),(0,i.jsx)("meta",{name:"twitter:card",content:"summary_large_image"}),(0,i.jsx)("meta",{name:"twitter:title",content:y}),(0,i.jsx)("meta",{name:"twitter:description",content:b}),(0,i.jsx)("meta",{name:"twitter:image",content:w}),(0,i.jsx)("meta",{name:"twitter:image:alt",content:b})]}),(0,i.jsx)("div",{className:c()("scroll-percentage-container",{invisible:!l}),children:(0,i.jsx)("div",{className:"scroll-percentage-bar",style:{width:"".concat(u,"%")}})}),(0,i.jsx)(M,{}),(0,i.jsx)("main",{className:"mb-20 flex-1 flex flex-col",children:h}),(0,i.jsx)(z,{})]})}},54784:(e,t,n)=>{n.d(t,{A:()=>o});var i=n(74848),a=n(32485),s=n.n(a);function o(e){let{children:t,className:n,size:a="default",hoverable:o}=e,r=s()({"bg-zinc-900 border border-zinc-800 drop-shadow-lg p-5 sm:p-8 md:p-10 rounded-3xl":"default"===a,"bg-zinc-900 border border-zinc-800 drop-shadow-lg p-5 md:p-2 sm:p-4 md:px-6 md:py-4 rounded-xl":"small"===a,"hover:bg-zing-950 transition":o},n);return(0,i.jsx)("div",{className:r,children:t})}},30772:(e,t,n)=>{n.d(t,{Ef:()=>o,Wo:()=>a,kY:()=>s,w_:()=>l,yW:()=>r});var i=n(36503);function a(e){return new Date(i.c9.fromISO(e).setZone("America/New_York").toISO()).toLocaleDateString("en-GB",{day:"numeric",month:"long",year:"numeric"})}function s(e){return e.map(e=>e.title).join(", ")}function o(e){return e.replace(/^(.*)\s(.*)$/,"$1 $2").replace(/-/g,"- ")}function r(e){return"/blog/".concat(e)}function l(e){return"/blog/tag/".concat(e)}},16939:(e,t,n)=>{n.d(t,{kq:()=>r,xQ:()=>l,Yr:()=>c}),n(50341);let i=JSON.parse('[{"title":"Abhishek Singh","slug":"abhishek-singh","description":"General Manager, Observability at Elastic","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var h=Object.create;var i=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var x=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var b=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),d=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of x(e))!_.call(t,a)&&a!==n&&i(t,a,{get:()=>e[a],enumerable:!(o=l(e,a))||o.enumerable});return t};var p=(t,e,n)=>(n=t!=null?h(f(t)):{},s(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>s(i({},\\"__esModule\\",{value:!0}),t);var m=b((C,c)=>{c.exports=_jsx_runtime});var k={};d(k,{default:()=>g,frontmatter:()=>j});var r=p(m()),j={title:\\"Abhishek Singh\\",slug:\\"abhishek-singh\\",description:\\"General Manager, Observability at Elastic\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function g(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return M(k);})();\\n;return Component;"},"_id":"authors/abhishek-singh.mdx","_raw":{"sourceFilePath":"authors/abhishek-singh.mdx","sourceFileName":"abhishek-singh.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/abhishek-singh"},"type":"Author","imageUrl":"","url":"/authors/abhishek-singh"},{"title":"Achyut Jhunjhunwala","slug":"achyut-jhunjhunwala","description":"","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var o=Object.defineProperty;var j=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),g=(t,n)=>{for(var e in n)o(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,u)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let a of l(n))!_.call(t,a)&&a!==e&&o(t,a,{get:()=>n[a],enumerable:!(u=j(n,a))||u.enumerable});return t};var p=(t,n,e)=>(e=t!=null?x(f(t)):{},c(n||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),w=t=>c(o({},\\"__esModule\\",{value:!0}),t);var s=d((D,i)=>{i.exports=_jsx_runtime});var M={};g(M,{default:()=>h,frontmatter:()=>y});var r=p(s()),y={title:\\"Achyut Jhunjhunwala\\",slug:\\"achyut-jhunjhunwala\\",description:\\"\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function h(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return w(M);})();\\n;return Component;"},"_id":"authors/achyut-jhunjhunwala.mdx","_raw":{"sourceFilePath":"authors/achyut-jhunjhunwala.mdx","sourceFileName":"achyut-jhunjhunwala.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/achyut-jhunjhunwala"},"type":"Author","imageUrl":"","url":"/authors/achyut-jhunjhunwala"},{"title":"Agi K Thomas","slug":"agi-thomas","description":"Senior Software Engineer, Observability at Elastic","image":"","body":{"raw":"","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),h=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!d.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(i=f(e,o))||i.enumerable});return t};var j=(t,e,n)=>(n=t!=null?x(_(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>s(a({},\\"__esModule\\",{value:!0}),t);var c=p((C,m)=>{m.exports=_jsx_runtime});var w={};h(w,{default:()=>g,frontmatter:()=>b});var r=j(c()),b={title:\\"Agi K Thomas\\",slug:\\"agi-thomas\\",description:\\"Senior Software Engineer, Observability at Elastic\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function g(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return M(w);})();\\n;return Component;"},"_id":"authors/agi-thomas.mdx","_raw":{"sourceFilePath":"authors/agi-thomas.mdx","sourceFileName":"agi-thomas.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/agi-thomas"},"type":"Author","imageUrl":"","url":"/authors/agi-thomas"},{"title":"Akhilesh Pokhariyal","slug":"akhilesh-pokhariyal","description":"","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var h=Object.create;var a=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),g=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of f(e))!_.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(i=x(e,o))||i.enumerable});return t};var k=(t,e,n)=>(n=t!=null?h(p(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>s(a({},\\"__esModule\\",{value:!0}),t);var l=d((D,c)=>{c.exports=_jsx_runtime});var M={};g(M,{default:()=>u,frontmatter:()=>y});var r=k(l()),y={title:\\"Akhilesh Pokhariyal\\",slug:\\"akhilesh-pokhariyal\\",description:\\"\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function u(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return j(M);})();\\n;return Component;"},"_id":"authors/akhilesh-pokhariyal.mdx","_raw":{"sourceFilePath":"authors/akhilesh-pokhariyal.mdx","sourceFileName":"akhilesh-pokhariyal.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/akhilesh-pokhariyal"},"type":"Author","imageUrl":"","url":"/authors/akhilesh-pokhariyal"},{"title":"Alexander Wert","slug":"alexander-wert","description":"Senior Manager, Observability Edge Collection and OpenTelemetry at Elastic","image":"alexander-wert.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var m=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var _=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),j=(e,t)=>{for(var n in t)o(e,n,{get:t[n],enumerable:!0})},l=(e,t,n,i)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let a of g(t))!f.call(e,a)&&a!==n&&o(e,a,{get:()=>t[a],enumerable:!(i=u(t,a))||i.enumerable});return e};var M=(e,t,n)=>(n=e!=null?m(p(e)):{},l(t||!e||!e.__esModule?o(n,\\"default\\",{value:e,enumerable:!0}):n,e)),w=e=>l(o({},\\"__esModule\\",{value:!0}),e);var c=_((D,s)=>{s.exports=_jsx_runtime});var C={};j(C,{default:()=>d,frontmatter:()=>y});var r=M(c()),y={title:\\"Alexander Wert\\",slug:\\"alexander-wert\\",description:\\"Senior Manager, Observability Edge Collection and OpenTelemetry at Elastic\\",image:\\"alexander-wert.jpg\\"};function x(e){return(0,r.jsx)(r.Fragment,{})}function d(e={}){let{wrapper:t}=e.components||{};return t?(0,r.jsx)(t,{...e,children:(0,r.jsx)(x,{...e})}):x(e)}return w(C);})();\\n;return Component;"},"_id":"authors/alexander-wert.mdx","_raw":{"sourceFilePath":"authors/alexander-wert.mdx","sourceFileName":"alexander-wert.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/alexander-wert"},"type":"Author","imageUrl":"/assets/images/authors/alexander-wert.jpg","url":"/authors/alexander-wert"},{"title":"Alexis Roberson","slug":"alexis-roberson","description":"","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var l=Object.create;var s=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)s(t,n,{get:e[n],enumerable:!0})},i=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!g.call(t,o)&&o!==n&&s(t,o,{get:()=>e[o],enumerable:!(a=f(e,o))||a.enumerable});return t};var M=(t,e,n)=>(n=t!=null?l(d(t)):{},i(e||!t||!t.__esModule?s(n,\\"default\\",{value:t,enumerable:!0}):n,t)),b=t=>i(s({},\\"__esModule\\",{value:!0}),t);var c=p((X,x)=>{x.exports=_jsx_runtime});var D={};j(D,{default:()=>u,frontmatter:()=>C});var r=M(c()),C={title:\\"Alexis Roberson\\",slug:\\"alexis-roberson\\",description:\\"\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function u(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return b(D);})();\\n;return Component;"},"_id":"authors/alexis-roberson.mdx","_raw":{"sourceFilePath":"authors/alexis-roberson.mdx","sourceFileName":"alexis-roberson.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/alexis-roberson"},"type":"Author","imageUrl":"","url":"/authors/alexis-roberson"},{"title":"Almudena Sanz Oliv\xe9","slug":"almudena-sanz-olive","description":"Senior Data Scientist at Elastic","image":"","body":{"raw":"","code":"var Component=(()=>{var x=Object.create;var o=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var p=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)o(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let r of f(n))!g.call(t,r)&&r!==e&&o(t,r,{get:()=>n[r],enumerable:!(i=d(n,r))||i.enumerable});return t};var D=(t,n,e)=>(e=t!=null?x(_(t)):{},s(n||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>s(o({},\\"__esModule\\",{value:!0}),t);var m=p((C,c)=>{c.exports=_jsx_runtime});var v={};j(v,{default:()=>l,frontmatter:()=>S});var a=D(m()),S={title:\\"Almudena Sanz Oliv\\\\xE9\\",slug:\\"almudena-sanz-olive\\",description:\\"Senior Data Scientist at Elastic\\",image:\\"\\"};function u(t){return(0,a.jsx)(a.Fragment,{})}function l(t={}){let{wrapper:n}=t.components||{};return n?(0,a.jsx)(n,{...t,children:(0,a.jsx)(u,{...t})}):u(t)}return M(v);})();\\n;return Component;"},"_id":"authors/almudena-sanz-olive.mdx","_raw":{"sourceFilePath":"authors/almudena-sanz-olive.mdx","sourceFileName":"almudena-sanz-olive.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/almudena-sanz-olive"},"type":"Author","imageUrl":"","url":"/authors/almudena-sanz-olive"},{"title":"Aman Agarwal","slug":"aman-agarwal","description":"Maintainer of OpenLIT","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),M=(t,n)=>{for(var e in n)o(t,e,{get:n[e],enumerable:!0})},m=(t,n,e,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let a of g(n))!_.call(t,a)&&a!==e&&o(t,a,{get:()=>n[a],enumerable:!(i=l(n,a))||i.enumerable});return t};var j=(t,n,e)=>(e=t!=null?f(p(t)):{},m(n||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),w=t=>m(o({},\\"__esModule\\",{value:!0}),t);var c=d((F,s)=>{s.exports=_jsx_runtime});var C={};M(C,{default:()=>x,frontmatter:()=>A});var r=j(c()),A={title:\\"Aman Agarwal\\",slug:\\"aman-agarwal\\",description:\\"Maintainer of OpenLIT\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return w(C);})();\\n;return Component;"},"_id":"authors/aman-agarwal.mdx","_raw":{"sourceFilePath":"authors/aman-agarwal.mdx","sourceFileName":"aman-agarwal.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/aman-agarwal"},"type":"Author","imageUrl":"","url":"/authors/aman-agarwal"},{"title":"Andres Rodriguez","slug":"andres-rodriguez","description":"Senior Director, Software Engineering at Elastic","image":"andres-rodriguez.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var m=Object.create;var i=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var _=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),j=(e,t)=>{for(var r in t)i(e,r,{get:t[r],enumerable:!0})},s=(e,t,r,a)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let o of f(t))!p.call(e,o)&&o!==r&&i(e,o,{get:()=>t[o],enumerable:!(a=x(t,o))||a.enumerable});return e};var z=(e,t,r)=>(r=e!=null?m(l(e)):{},s(t||!e||!e.__esModule?i(r,\\"default\\",{value:e,enumerable:!0}):r,e)),D=e=>s(i({},\\"__esModule\\",{value:!0}),e);var c=_((E,u)=>{u.exports=_jsx_runtime});var w={};j(w,{default:()=>g,frontmatter:()=>M});var n=z(c()),M={title:\\"Andres Rodriguez\\",slug:\\"andres-rodriguez\\",description:\\"Senior Director, Software Engineering at Elastic\\",image:\\"andres-rodriguez.jpg\\"};function d(e){return(0,n.jsx)(n.Fragment,{})}function g(e={}){let{wrapper:t}=e.components||{};return t?(0,n.jsx)(t,{...e,children:(0,n.jsx)(d,{...e})}):d(e)}return D(w);})();\\n;return Component;"},"_id":"authors/andres-rodriguez.mdx","_raw":{"sourceFilePath":"authors/andres-rodriguez.mdx","sourceFileName":"andres-rodriguez.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/andres-rodriguez"},"type":"Author","imageUrl":"/assets/images/authors/andres-rodriguez.jpg","url":"/authors/andres-rodriguez"},{"title":"Andrew Cholakian","slug":"andrew-cholakian","description":"Technical Lead","image":"andrew-cholakian.jpg","body":{"raw":"Andrew is a technical lead working in Elastic Observability, where he\'s currently working on Elastic\'s Observability solutions. He has worked on projects, such as Logstash, Heartbeat, Synthetics, and the Observability UI.\\n\\n","code":"var Component=(()=>{var u=Object.create;var o=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var w=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var x=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),g=(e,t)=>{for(var n in t)o(e,n,{get:t[n],enumerable:!0})},s=(e,t,n,i)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let a of w(t))!b.call(e,a)&&a!==n&&o(e,a,{get:()=>t[a],enumerable:!(i=p(t,a))||i.enumerable});return e};var k=(e,t,n)=>(n=e!=null?u(m(e)):{},s(t||!e||!e.__esModule?o(n,\\"default\\",{value:e,enumerable:!0}):n,e)),y=e=>s(o({},\\"__esModule\\",{value:!0}),e);var l=x((v,c)=>{c.exports=_jsx_runtime});var j={};g(j,{default:()=>d,frontmatter:()=>f});var r=k(l()),f={title:\\"Andrew Cholakian\\",slug:\\"andrew-cholakian\\",description:\\"Technical Lead\\",image:\\"andrew-cholakian.jpg\\"};function h(e){let t={p:\\"p\\",...e.components};return(0,r.jsx)(t.p,{children:\\"Andrew is a technical lead working in Elastic Observability, where he\'s currently working on Elastic\'s Observability solutions. He has worked on projects, such as Logstash, Heartbeat, Synthetics, and the Observability UI.\\"})}function d(e={}){let{wrapper:t}=e.components||{};return t?(0,r.jsx)(t,{...e,children:(0,r.jsx)(h,{...e})}):h(e)}return y(j);})();\\n;return Component;"},"_id":"authors/andrew-cholakian.mdx","_raw":{"sourceFilePath":"authors/andrew-cholakian.mdx","sourceFileName":"andrew-cholakian.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/andrew-cholakian"},"type":"Author","imageUrl":"/assets/images/authors/andrew-cholakian.jpg","url":"/authors/andrew-cholakian"},{"title":"Baha Azarmi","slug":"baha-azarmi","description":"VP, Customer Engineering at Elastic","image":"Baha.jpeg","body":{"raw":"\\nBaha joined Elastic in 2015 and is currently VP, Customer Success. He previously co-founded ReachFive, a marketing data platform, and worked for several software vendors including Talend and Oracle. Baha holds a master’s degree in computer science.\\n","code":"var Component=(()=>{var l=Object.create;var r=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var h=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var x=(e,n)=>()=>(n||e((n={exports:{}}).exports,n),n.exports),j=(e,n)=>{for(var t in n)r(e,t,{get:n[t],enumerable:!0})},s=(e,n,t,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let a of f(n))!g.call(e,a)&&a!==t&&r(e,a,{get:()=>n[a],enumerable:!(i=p(n,a))||i.enumerable});return e};var _=(e,n,t)=>(t=e!=null?l(h(e)):{},s(n||!e||!e.__esModule?r(t,\\"default\\",{value:e,enumerable:!0}):t,e)),v=e=>s(r({},\\"__esModule\\",{value:!0}),e);var d=x((y,c)=>{c.exports=_jsx_runtime});var C={};j(C,{default:()=>u,frontmatter:()=>B});var o=_(d()),B={title:\\"Baha Azarmi\\",slug:\\"baha-azarmi\\",description:\\"VP, Customer Engineering at Elastic\\",image:\\"Baha.jpeg\\"};function m(e){let n={p:\\"p\\",...e.components};return(0,o.jsx)(n.p,{children:\\"Baha joined Elastic in 2015 and is currently VP, Customer Success. He previously co-founded ReachFive, a marketing data platform, and worked for several software vendors including Talend and Oracle. Baha holds a master\\\\u2019s degree in computer science.\\"})}function u(e={}){let{wrapper:n}=e.components||{};return n?(0,o.jsx)(n,{...e,children:(0,o.jsx)(m,{...e})}):m(e)}return v(C);})();\\n;return Component;"},"_id":"authors/baha-azarmi.mdx","_raw":{"sourceFilePath":"authors/baha-azarmi.mdx","sourceFileName":"baha-azarmi.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/baha-azarmi"},"type":"Author","imageUrl":"/assets/images/authors/Baha.jpeg","url":"/authors/baha-azarmi"},{"title":"Bahubali Shetti","slug":"bahubali-shetti","description":"Senior Director, Technical Solutions for Observability at Elastic","image":"bahubali-shetti.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var m=Object.create;var a=Object.defineProperty;var b=Object.getOwnPropertyDescriptor;var x=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),_=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of x(e))!g.call(t,r)&&r!==n&&a(t,r,{get:()=>e[r],enumerable:!(o=b(e,r))||o.enumerable});return t};var d=(t,e,n)=>(n=t!=null?m(f(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>s(a({},\\"__esModule\\",{value:!0}),t);var u=p((y,c)=>{c.exports=_jsx_runtime});var M={};_(M,{default:()=>h,frontmatter:()=>D});var i=d(u()),D={title:\\"Bahubali Shetti\\",slug:\\"bahubali-shetti\\",description:\\"Senior Director, Technical Solutions for Observability at Elastic\\",image:\\"bahubali-shetti.jpg\\"};function l(t){return(0,i.jsx)(i.Fragment,{})}function h(t={}){let{wrapper:e}=t.components||{};return e?(0,i.jsx)(e,{...t,children:(0,i.jsx)(l,{...t})}):l(t)}return j(M);})();\\n;return Component;"},"_id":"authors/bahubali-shetti.mdx","_raw":{"sourceFilePath":"authors/bahubali-shetti.mdx","sourceFileName":"bahubali-shetti.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/bahubali-shetti"},"type":"Author","imageUrl":"/assets/images/authors/bahubali-shetti.jpg","url":"/authors/bahubali-shetti"},{"title":"Bryce Buchanan","slug":"bryce-buchanan","description":"Elastic","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var p=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),h=(t,n)=>{for(var e in n)o(t,e,{get:n[e],enumerable:!0})},i=(t,n,e,c)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let a of _(n))!g.call(t,a)&&a!==e&&o(t,a,{get:()=>n[a],enumerable:!(c=l(n,a))||c.enumerable});return t};var j=(t,n,e)=>(e=t!=null?f(d(t)):{},i(n||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),y=t=>i(o({},\\"__esModule\\",{value:!0}),t);var u=p((C,s)=>{s.exports=_jsx_runtime});var b={};h(b,{default:()=>x,frontmatter:()=>M});var r=j(u()),M={title:\\"Bryce Buchanan\\",slug:\\"bryce-buchanan\\",description:\\"Elastic\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return y(b);})();\\n;return Component;"},"_id":"authors/bryce-buchanan.mdx","_raw":{"sourceFilePath":"authors/bryce-buchanan.mdx","sourceFileName":"bryce-buchanan.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/bryce-buchanan"},"type":"Author","imageUrl":"","url":"/authors/bryce-buchanan"},{"title":"Carly Richmond","slug":"carly-richmond","description":"Principal Developer Advocate","image":"carly-richmond.jpeg","body":{"raw":"Carly is a principal developer advocate at Elastic. Before joining Elastic in 2022, she spent over 10 years working as a software engineer, scrum master, and engineering leader at a large investment bank. She is a UI engineer who occasionally dabbles in writing backend services, a speaker, and a regular blogger on both her personal blog and the Elastic blog.\\n\\nShe enjoys cooking, photography, drinking tea, and chasing after her young son in her spare time.\\n\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var h=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var x=(e,n)=>()=>(n||e((n={exports:{}}).exports,n),n.exports),b=(e,n)=>{for(var a in n)o(e,a,{get:n[a],enumerable:!0})},s=(e,n,a,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let t of m(n))!y.call(e,t)&&t!==a&&o(e,t,{get:()=>n[t],enumerable:!(i=h(n,t))||i.enumerable});return e};var f=(e,n,a)=>(a=e!=null?p(u(e)):{},s(n||!e||!e.__esModule?o(a,\\"default\\",{value:e,enumerable:!0}):a,e)),j=e=>s(o({},\\"__esModule\\",{value:!0}),e);var l=x((w,c)=>{c.exports=_jsx_runtime});var _={};b(_,{default:()=>d,frontmatter:()=>v});var r=f(l()),v={title:\\"Carly Richmond\\",slug:\\"carly-richmond\\",description:\\"Principal Developer Advocate\\",image:\\"carly-richmond.jpeg\\"};function g(e){let n={p:\\"p\\",...e.components};return(0,r.jsxs)(r.Fragment,{children:[(0,r.jsx)(n.p,{children:\\"Carly is a principal developer advocate at Elastic. Before joining Elastic in 2022, she spent over 10 years working as a software engineer, scrum master, and engineering leader at a large investment bank. She is a UI engineer who occasionally dabbles in writing backend services, a speaker, and a regular blogger on both her personal blog and the Elastic blog.\\"}),`\\n`,(0,r.jsx)(n.p,{children:\\"She enjoys cooking, photography, drinking tea, and chasing after her young son in her spare time.\\"})]})}function d(e={}){let{wrapper:n}=e.components||{};return n?(0,r.jsx)(n,{...e,children:(0,r.jsx)(g,{...e})}):g(e)}return j(_);})();\\n;return Component;"},"_id":"authors/carly-richmond.mdx","_raw":{"sourceFilePath":"authors/carly-richmond.mdx","sourceFileName":"carly-richmond.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/carly-richmond"},"type":"Author","imageUrl":"/assets/images/authors/carly-richmond.jpeg","url":"/authors/carly-richmond"},{"title":"Carson Ip","slug":"carson-ip","description":"Software Engineer II","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var l=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of g(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(i=p(n,o))||i.enumerable});return t};var C=(t,n,e)=>(e=t!=null?f(_(t)):{},s(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),I=t=>s(a({},\\"__esModule\\",{value:!0}),t);var m=l((F,c)=>{c.exports=_jsx_runtime});var w={};j(w,{default:()=>x,frontmatter:()=>M});var r=C(m()),M={title:\\"Carson Ip\\",slug:\\"carson-ip\\",description:\\"Software Engineer II\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return I(w);})();\\n;return Component;"},"_id":"authors/carson-ip.mdx","_raw":{"sourceFilePath":"authors/carson-ip.mdx","sourceFileName":"carson-ip.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/carson-ip"},"type":"Author","imageUrl":"","url":"/authors/carson-ip"},{"title":"Cesar Munoz","slug":"cesar-munoz","description":"","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),M=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},u=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of d(e))!l.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=_(e,o))||s.enumerable});return t};var j=(t,e,n)=>(n=t!=null?f(g(t)):{},u(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var i=p((X,c)=>{c.exports=_jsx_runtime});var D={};M(D,{default:()=>x,frontmatter:()=>z});var r=j(i()),z={title:\\"Cesar Munoz\\",slug:\\"cesar-munoz\\",description:\\"\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return C(D);})();\\n;return Component;"},"_id":"authors/cesar-munoz.mdx","_raw":{"sourceFilePath":"authors/cesar-munoz.mdx","sourceFileName":"cesar-munoz.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/cesar-munoz"},"type":"Author","imageUrl":"","url":"/authors/cesar-munoz"},{"title":"Christos Kalkanis","slug":"christos-kalkanis","description":"Principal software engineer, Observability","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),h=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of g(e))!_.call(t,i)&&i!==n&&a(t,i,{get:()=>e[i],enumerable:!(o=f(e,i))||o.enumerable});return t};var j=(t,e,n)=>(n=t!=null?x(p(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),k=t=>s(a({},\\"__esModule\\",{value:!0}),t);var l=d((w,c)=>{c.exports=_jsx_runtime});var M={};h(M,{default:()=>u,frontmatter:()=>C});var r=j(l()),C={title:\\"Christos Kalkanis\\",slug:\\"christos-kalkanis\\",description:\\"Principal software engineer, Observability\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function u(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return k(M);})();\\n;return Component;"},"_id":"authors/christos-kalkanis.mdx","_raw":{"sourceFilePath":"authors/christos-kalkanis.mdx","sourceFileName":"christos-kalkanis.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/christos-kalkanis"},"type":"Author","imageUrl":"","url":"/authors/christos-kalkanis"},{"title":"Christos Markou","slug":"christos-markou","description":"Senior Software Engineer, Observability and OpenTelemetry at Elastic","image":"christos-markou.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var _=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),h=(t,e)=>{for(var r in e)a(t,r,{get:e[r],enumerable:!0})},s=(t,e,r,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!d.call(t,o)&&o!==r&&a(t,o,{get:()=>e[o],enumerable:!(i=f(e,o))||i.enumerable});return t};var j=(t,e,r)=>(r=t!=null?x(p(t)):{},s(e||!t||!t.__esModule?a(r,\\"default\\",{value:t,enumerable:!0}):r,t)),M=t=>s(a({},\\"__esModule\\",{value:!0}),t);var m=_((b,c)=>{c.exports=_jsx_runtime});var y={};h(y,{default:()=>l,frontmatter:()=>k});var n=j(m()),k={title:\\"Christos Markou\\",slug:\\"christos-markou\\",description:\\"Senior Software Engineer, Observability and OpenTelemetry at Elastic\\",image:\\"christos-markou.jpg\\"};function u(t){return(0,n.jsx)(n.Fragment,{})}function l(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(u,{...t})}):u(t)}return M(y);})();\\n;return Component;"},"_id":"authors/christos-markou.mdx","_raw":{"sourceFilePath":"authors/christos-markou.mdx","sourceFileName":"christos-markou.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/christos-markou"},"type":"Author","imageUrl":"/assets/images/authors/christos-markou.jpg","url":"/authors/christos-markou"},{"title":"Damien Mathieu","slug":"damien-mathieu","description":"Principal Software Engineer","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var i=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var _=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),M=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},m=(t,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!p.call(t,a)&&a!==n&&i(t,a,{get:()=>e[a],enumerable:!(o=d(e,a))||o.enumerable});return t};var h=(t,e,n)=>(n=t!=null?f(l(t)):{},m(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>m(i({},\\"__esModule\\",{value:!0}),t);var c=_((F,u)=>{u.exports=_jsx_runtime});var w={};M(w,{default:()=>x,frontmatter:()=>D});var r=h(c()),D={title:\\"Damien Mathieu\\",slug:\\"damien-mathieu\\",description:\\"Principal Software Engineer\\",image:\\"\\"};function s(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(s,{...t})}):s(t)}return j(w);})();\\n;return Component;"},"_id":"authors/damien-mathieu.mdx","_raw":{"sourceFilePath":"authors/damien-mathieu.mdx","sourceFileName":"damien-mathieu.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/damien-mathieu"},"type":"Author","imageUrl":"","url":"/authors/damien-mathieu"},{"title":"Dario Gieselaar","slug":"dario-gieselaar","description":"Senior Software Engineer at Elastic","image":"dario-gieselaar.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var u=Object.create;var o=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var _=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),j=(e,t)=>{for(var r in t)o(e,r,{get:t[r],enumerable:!0})},s=(e,t,r,i)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let a of d(t))!p.call(e,a)&&a!==r&&o(e,a,{get:()=>t[a],enumerable:!(i=x(t,a))||i.enumerable});return e};var D=(e,t,r)=>(r=e!=null?u(f(e)):{},s(t||!e||!e.__esModule?o(r,\\"default\\",{value:e,enumerable:!0}):r,e)),M=e=>s(o({},\\"__esModule\\",{value:!0}),e);var g=_((F,c)=>{c.exports=_jsx_runtime});var C={};j(C,{default:()=>m,frontmatter:()=>w});var n=D(g()),w={title:\\"Dario Gieselaar\\",slug:\\"dario-gieselaar\\",description:\\"Senior Software Engineer at Elastic\\",image:\\"dario-gieselaar.jpg\\"};function l(e){return(0,n.jsx)(n.Fragment,{})}function m(e={}){let{wrapper:t}=e.components||{};return t?(0,n.jsx)(t,{...e,children:(0,n.jsx)(l,{...e})}):l(e)}return M(C);})();\\n;return Component;"},"_id":"authors/dario-gieselaar.mdx","_raw":{"sourceFilePath":"authors/dario-gieselaar.mdx","sourceFileName":"dario-gieselaar.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/dario-gieselaar"},"type":"Author","imageUrl":"/assets/images/authors/dario-gieselaar.jpg","url":"/authors/dario-gieselaar"},{"title":"David Hope","slug":"david-hope","description":"Director, Observability and AIOps Solutions at Elastic","image":"david-hope.png","body":{"raw":"David Hope is a dedicated IT professional with over 16 years of diverse experience spanning from development, DevOps, leadership, sales, and product marketing. David currently lives in the USA with his family and moved here eight years ago from the UK after meeting his wife in Buffalo, NY.\\n\\n","code":"var Component=(()=>{var m=Object.create;var o=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var h=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,v=Object.prototype.hasOwnProperty;var g=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),x=(e,t)=>{for(var n in t)o(e,n,{get:t[n],enumerable:!0})},s=(e,t,n,a)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let i of h(t))!v.call(e,i)&&i!==n&&o(e,i,{get:()=>t[i],enumerable:!(a=f(t,i))||a.enumerable});return e};var D=(e,t,n)=>(n=e!=null?m(u(e)):{},s(t||!e||!e.__esModule?o(n,\\"default\\",{value:e,enumerable:!0}):n,e)),y=e=>s(o({},\\"__esModule\\",{value:!0}),e);var p=g((M,d)=>{d.exports=_jsx_runtime});var w={};x(w,{default:()=>l,frontmatter:()=>_});var r=D(p()),_={title:\\"David Hope\\",slug:\\"david-hope\\",description:\\"Director, Observability and AIOps Solutions at Elastic\\",image:\\"david-hope.png\\"};function c(e){let t={p:\\"p\\",...e.components};return(0,r.jsx)(t.p,{children:\\"David Hope is a dedicated IT professional with over 16 years of diverse experience spanning from development, DevOps, leadership, sales, and product marketing. David currently lives in the USA with his family and moved here eight years ago from the UK after meeting his wife in Buffalo, NY.\\"})}function l(e={}){let{wrapper:t}=e.components||{};return t?(0,r.jsx)(t,{...e,children:(0,r.jsx)(c,{...e})}):c(e)}return y(w);})();\\n;return Component;"},"_id":"authors/david-hope.mdx","_raw":{"sourceFilePath":"authors/david-hope.mdx","sourceFileName":"david-hope.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/david-hope"},"type":"Author","imageUrl":"/assets/images/authors/david-hope.png","url":"/authors/david-hope"},{"title":"David Ricordel","slug":"david-ricordel","description":"Consulting Architect","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var l=Object.create;var i=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of f(e))!_.call(t,o)&&o!==n&&i(t,o,{get:()=>e[o],enumerable:!(a=x(e,o))||a.enumerable});return t};var C=(t,e,n)=>(n=t!=null?l(g(t)):{},c(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),D=t=>c(i({},\\"__esModule\\",{value:!0}),t);var d=p((F,s)=>{s.exports=_jsx_runtime});var h={};j(h,{default:()=>m,frontmatter:()=>M});var r=C(d()),M={title:\\"David Ricordel\\",slug:\\"david-ricordel\\",description:\\"Consulting Architect\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return D(h);})();\\n;return Component;"},"_id":"authors/david-ricordel.mdx","_raw":{"sourceFilePath":"authors/david-ricordel.mdx","sourceFileName":"david-ricordel.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/david-ricordel"},"type":"Author","imageUrl":"","url":"/authors/david-ricordel"},{"title":"Drew Post","slug":"drew-post","description":"Principal Product Manager, Observability at Elastic","image":"drew-post.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var d=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var x=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var _=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var r in e)a(t,r,{get:e[r],enumerable:!0})},s=(t,e,r,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of x(e))!f.call(t,o)&&o!==r&&a(t,o,{get:()=>e[o],enumerable:!(i=l(e,o))||i.enumerable});return t};var w=(t,e,r)=>(r=t!=null?d(g(t)):{},s(e||!t||!t.__esModule?a(r,\\"default\\",{value:t,enumerable:!0}):r,t)),M=t=>s(a({},\\"__esModule\\",{value:!0}),t);var u=_((y,c)=>{c.exports=_jsx_runtime});var P={};j(P,{default:()=>p,frontmatter:()=>D});var n=w(u()),D={title:\\"Drew Post\\",slug:\\"drew-post\\",description:\\"Principal Product Manager, Observability at Elastic\\",image:\\"drew-post.jpg\\"};function m(t){return(0,n.jsx)(n.Fragment,{})}function p(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(m,{...t})}):m(t)}return M(P);})();\\n;return Component;"},"_id":"authors/drew-post.mdx","_raw":{"sourceFilePath":"authors/drew-post.mdx","sourceFileName":"drew-post.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/drew-post"},"type":"Author","imageUrl":"/assets/images/authors/drew-post.jpg","url":"/authors/drew-post"},{"title":"Elastic Observability Team","slug":"elastic-observability-team","description":"Elastic Observability Team","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var i=Object.defineProperty;var b=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of f(e))!d.call(t,a)&&a!==n&&i(t,a,{get:()=>e[a],enumerable:!(o=b(e,a))||o.enumerable});return t};var y=(t,e,n)=>(n=t!=null?x(_(t)):{},s(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>s(i({},\\"__esModule\\",{value:!0}),t);var m=g((D,c)=>{c.exports=_jsx_runtime});var M={};p(M,{default:()=>u,frontmatter:()=>v});var r=y(m()),v={title:\\"Elastic Observability Team\\",slug:\\"elastic-observability-team\\",description:\\"Elastic Observability Team\\",image:\\"\\"};function l(t){return(0,r.jsx)(r.Fragment,{})}function u(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(l,{...t})}):l(t)}return j(M);})();\\n;return Component;"},"_id":"authors/elasitic-observability-team.mdx","_raw":{"sourceFilePath":"authors/elasitic-observability-team.mdx","sourceFileName":"elasitic-observability-team.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/elasitic-observability-team"},"type":"Author","imageUrl":"","url":"/authors/elastic-observability-team"},{"title":"Eric Lowry","slug":"eric-lowry","description":"Principal Solutions Architect at Elastic","image":"eric-lowry.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var i=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,r)=>()=>(r||t((r={exports:{}}).exports,r),r.exports),j=(t,r)=>{for(var e in r)i(t,e,{get:r[e],enumerable:!0})},a=(t,r,e,c)=>{if(r&&typeof r==\\"object\\"||typeof r==\\"function\\")for(let o of f(r))!_.call(t,o)&&o!==e&&i(t,o,{get:()=>r[o],enumerable:!(c=p(r,o))||c.enumerable});return t};var w=(t,r,e)=>(e=t!=null?x(g(t)):{},a(r||!t||!t.__esModule?i(e,\\"default\\",{value:t,enumerable:!0}):e,t)),y=t=>a(i({},\\"__esModule\\",{value:!0}),t);var l=d((D,s)=>{s.exports=_jsx_runtime});var h={};j(h,{default:()=>m,frontmatter:()=>M});var n=w(l()),M={title:\\"Eric Lowry\\",slug:\\"eric-lowry\\",description:\\"Principal Solutions Architect at Elastic\\",image:\\"eric-lowry.jpg\\"};function u(t){return(0,n.jsx)(n.Fragment,{})}function m(t={}){let{wrapper:r}=t.components||{};return r?(0,n.jsx)(r,{...t,children:(0,n.jsx)(u,{...t})}):u(t)}return y(h);})();\\n;return Component;"},"_id":"authors/eric-lowry.mdx","_raw":{"sourceFilePath":"authors/eric-lowry.mdx","sourceFileName":"eric-lowry.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/eric-lowry"},"type":"Author","imageUrl":"/assets/images/authors/eric-lowry.jpg","url":"/authors/eric-lowry"},{"title":"Felix Barnsteiner","slug":"felix-barnsteiner","description":"Tech Lead","image":"felix-barnsteiner.jpg","body":{"raw":"Felix joined Elastic in 2018 where he built the Elastic APM Java agent from the ground up. Two years in, he took the role of the tech lead for the APM Agents team. Another two years later, he became tech lead of application observability. Since Oct 2023, Felix is a floating tech lead in observability, focusing on strategical initiatives, such as working with other teams to make Elasticsearch the best place to store your observability signals. Felix is also a passionate home barista and loves practicing latte art.\\n\\n","code":"var Component=(()=>{var p=Object.create;var n=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var x=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var d=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),g=(e,t)=>{for(var i in t)n(e,i,{get:t[i],enumerable:!0})},s=(e,t,i,r)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let a of f(t))!b.call(e,a)&&a!==i&&n(e,a,{get:()=>t[a],enumerable:!(r=u(t,a))||r.enumerable});return e};var y=(e,t,i)=>(i=e!=null?p(x(e)):{},s(t||!e||!e.__esModule?n(i,\\"default\\",{value:e,enumerable:!0}):i,e)),v=e=>s(n({},\\"__esModule\\",{value:!0}),e);var c=d((_,l)=>{l.exports=_jsx_runtime});var j={};g(j,{default:()=>m,frontmatter:()=>w});var o=y(c()),w={title:\\"Felix Barnsteiner\\",slug:\\"felix-barnsteiner\\",description:\\"Tech Lead\\",image:\\"felix-barnsteiner.jpg\\"};function h(e){let t={p:\\"p\\",...e.components};return(0,o.jsx)(t.p,{children:\\"Felix joined Elastic in 2018 where he built the Elastic APM Java agent from the ground up. Two years in, he took the role of the tech lead for the APM Agents team. Another two years later, he became tech lead of application observability. Since Oct 2023, Felix is a floating tech lead in observability, focusing on strategical initiatives, such as working with other teams to make Elasticsearch the best place to store your observability signals. Felix is also a passionate home barista and loves practicing latte art.\\"})}function m(e={}){let{wrapper:t}=e.components||{};return t?(0,o.jsx)(t,{...e,children:(0,o.jsx)(h,{...e})}):h(e)}return v(j);})();\\n;return Component;"},"_id":"authors/felix-barnsteiner.mdx","_raw":{"sourceFilePath":"authors/felix-barnsteiner.mdx","sourceFileName":"felix-barnsteiner.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/felix-barnsteiner"},"type":"Author","imageUrl":"/assets/images/authors/felix-barnsteiner.jpg","url":"/authors/felix-barnsteiner"},{"title":"Francesco Gualazzi","slug":"francesco-gualazzi","description":"","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),z=(t,e)=>{for(var n in e)o(t,n,{get:e[n],enumerable:!0})},i=(t,e,n,c)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!d.call(t,a)&&a!==n&&o(t,a,{get:()=>e[a],enumerable:!(c=l(e,a))||c.enumerable});return t};var j=(t,e,n)=>(n=t!=null?f(_(t)):{},i(e||!t||!t.__esModule?o(n,\\"default\\",{value:t,enumerable:!0}):n,t)),F=t=>i(o({},\\"__esModule\\",{value:!0}),t);var u=p((X,s)=>{s.exports=_jsx_runtime});var C={};z(C,{default:()=>x,frontmatter:()=>M});var r=j(u()),M={title:\\"Francesco Gualazzi\\",slug:\\"francesco-gualazzi\\",description:\\"\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return F(C);})();\\n;return Component;"},"_id":"authors/francesco-gualazzi.mdx","_raw":{"sourceFilePath":"authors/francesco-gualazzi.mdx","sourceFileName":"francesco-gualazzi.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/francesco-gualazzi"},"type":"Author","imageUrl":"","url":"/authors/francesco-gualazzi"},{"title":"Giorgos Bamparopoulos","slug":"giorgos-bamparopoulos","description":"","image":"","body":{"raw":"","code":"var Component=(()=>{var g=Object.create;var a=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,o)=>()=>(o||t((o={exports:{}}).exports,o),o.exports),j=(t,o)=>{for(var r in o)a(t,r,{get:o[r],enumerable:!0})},i=(t,o,r,s)=>{if(o&&typeof o==\\"object\\"||typeof o==\\"function\\")for(let n of l(o))!_.call(t,n)&&n!==r&&a(t,n,{get:()=>o[n],enumerable:!(s=x(o,n))||s.enumerable});return t};var M=(t,o,r)=>(r=t!=null?g(f(t)):{},i(o||!t||!t.__esModule?a(r,\\"default\\",{value:t,enumerable:!0}):r,t)),C=t=>i(a({},\\"__esModule\\",{value:!0}),t);var u=d((b,m)=>{m.exports=_jsx_runtime});var F={};j(F,{default:()=>p,frontmatter:()=>D});var e=M(u()),D={title:\\"Giorgos Bamparopoulos\\",slug:\\"giorgos-bamparopoulos\\",description:\\"\\",image:\\"\\"};function c(t){return(0,e.jsx)(e.Fragment,{})}function p(t={}){let{wrapper:o}=t.components||{};return o?(0,e.jsx)(o,{...t,children:(0,e.jsx)(c,{...t})}):c(t)}return C(F);})();\\n;return Component;"},"_id":"authors/giorgos-bamparopoulos.mdx","_raw":{"sourceFilePath":"authors/giorgos-bamparopoulos.mdx","sourceFileName":"giorgos-bamparopoulos.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/giorgos-bamparopoulos"},"type":"Author","imageUrl":"","url":"/authors/giorgos-bamparopoulos"},{"title":"Giuseppe Santoro","slug":"giuseppe-santoro","description":"Senior Software Engineer","image":"giuseppe-santoro.jpg","body":{"raw":"Giuseppe, a seasoned senior software engineer with 10 years of experience, is an integral member of the Observability department, specializing in Cloud Native Monitoring. With a robust background in cloud technology, Kubernetes, Golang, and Python, Giuseppe is dedicated to ongoing growth and improvement in the field. Giuseppe\'s commitment to excellence is underscored by his Certified Kubernetes Administrator credential, demonstrating his proficiency in orchestrating containerized applications in complex environments. Before joining Elastic, Giuseppe served as a senior data engineer at Playstation, where he honed his skills and contributed to the gaming industry\'s data landscape.\\n\\n","code":"var Component=(()=>{var l=Object.create;var o=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var h=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var x=(e,n)=>()=>(n||e((n={exports:{}}).exports,n),n.exports),b=(e,n)=>{for(var t in n)o(e,t,{get:n[t],enumerable:!0})},a=(e,n,t,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let i of m(n))!f.call(e,i)&&i!==t&&o(e,i,{get:()=>n[i],enumerable:!(s=g(n,i))||s.enumerable});return e};var y=(e,n,t)=>(t=e!=null?l(h(e)):{},a(n||!e||!e.__esModule?o(t,\\"default\\",{value:e,enumerable:!0}):t,e)),w=e=>a(o({},\\"__esModule\\",{value:!0}),e);var d=x((_,c)=>{c.exports=_jsx_runtime});var j={};b(j,{default:()=>u,frontmatter:()=>G});var r=y(d()),G={title:\\"Giuseppe Santoro\\",slug:\\"giuseppe-santoro\\",description:\\"Senior Software Engineer\\",image:\\"giuseppe-santoro.jpg\\"};function p(e){let n={p:\\"p\\",...e.components};return(0,r.jsx)(n.p,{children:\\"Giuseppe, a seasoned senior software engineer with 10 years of experience, is an integral member of the Observability department, specializing in Cloud Native Monitoring. With a robust background in cloud technology, Kubernetes, Golang, and Python, Giuseppe is dedicated to ongoing growth and improvement in the field. Giuseppe\'s commitment to excellence is underscored by his Certified Kubernetes Administrator credential, demonstrating his proficiency in orchestrating containerized applications in complex environments. Before joining Elastic, Giuseppe served as a senior data engineer at Playstation, where he honed his skills and contributed to the gaming industry\'s data landscape.\\"})}function u(e={}){let{wrapper:n}=e.components||{};return n?(0,r.jsx)(n,{...e,children:(0,r.jsx)(p,{...e})}):p(e)}return w(j);})();\\n;return Component;"},"_id":"authors/giuseppe-santoro.mdx","_raw":{"sourceFilePath":"authors/giuseppe-santoro.mdx","sourceFileName":"giuseppe-santoro.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/giuseppe-santoro"},"type":"Author","imageUrl":"/assets/images/authors/giuseppe-santoro.jpg","url":"/authors/giuseppe-santoro"},{"title":"Greg Kalapos","slug":"greg-kalapos","description":"","image":"greg-kalapos.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var x=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},g=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of x(e))!_.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=l(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?u(f(t)):{},g(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),k=t=>g(a({},\\"__esModule\\",{value:!0}),t);var c=d((X,i)=>{i.exports=_jsx_runtime});var D={};j(D,{default:()=>p,frontmatter:()=>C});var r=M(c()),C={title:\\"Greg Kalapos\\",slug:\\"greg-kalapos\\",description:\\"\\",image:\\"greg-kalapos.jpg\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function p(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return k(D);})();\\n;return Component;"},"_id":"authors/greg-kalapos.mdx","_raw":{"sourceFilePath":"authors/greg-kalapos.mdx","sourceFileName":"greg-kalapos.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/greg-kalapos"},"type":"Author","imageUrl":"/assets/images/authors/greg-kalapos.jpg","url":"/authors/greg-kalapos"},{"title":"Hemant Malik","slug":"hemant-malik","description":"Senior Principal Solutions Architect at Elastic","image":"hemant-malik.jpg","body":{"raw":"Hemant Malik is a principal solutions architect for Elastic, based out of Greater Seattle Area. Hemant is an Azure Certified Solutions Architect and has been working with users of Elastic since 2017. Before Elastic, Hemant worked at Oracle, where he focused on helping users integrate applications, services, data, and humans into their business processes.\\n\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var h=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,x=Object.prototype.hasOwnProperty;var g=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),k=(e,t)=>{for(var n in t)a(e,n,{get:t[n],enumerable:!0})},o=(e,t,n,s)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let i of d(t))!x.call(e,i)&&i!==n&&a(e,i,{get:()=>t[i],enumerable:!(s=h(t,i))||s.enumerable});return e};var w=(e,t,n)=>(n=e!=null?p(f(e)):{},o(t||!e||!e.__esModule?a(n,\\"default\\",{value:e,enumerable:!0}):n,e)),M=e=>o(a({},\\"__esModule\\",{value:!0}),e);var l=g((E,c)=>{c.exports=_jsx_runtime});var j={};k(j,{default:()=>m,frontmatter:()=>_});var r=w(l()),_={title:\\"Hemant Malik\\",slug:\\"hemant-malik\\",description:\\"Senior Principal Solutions Architect at Elastic\\",image:\\"hemant-malik.jpg\\"};function u(e){let t={p:\\"p\\",...e.components};return(0,r.jsx)(t.p,{children:\\"Hemant Malik is a principal solutions architect for Elastic, based out of Greater Seattle Area. Hemant is an Azure Certified Solutions Architect and has been working with users of Elastic since 2017. Before Elastic, Hemant worked at Oracle, where he focused on helping users integrate applications, services, data, and humans into their business processes.\\"})}function m(e={}){let{wrapper:t}=e.components||{};return t?(0,r.jsx)(t,{...e,children:(0,r.jsx)(u,{...e})}):u(e)}return M(j);})();\\n;return Component;"},"_id":"authors/hemant-malik.mdx","_raw":{"sourceFilePath":"authors/hemant-malik.mdx","sourceFileName":"hemant-malik.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/hemant-malik"},"type":"Author","imageUrl":"/assets/images/authors/hemant-malik.jpg","url":"/authors/hemant-malik"},{"title":"Ishleen Kaur","slug":"ishleen-kaur","description":"Principal Software Engineer","image":"ishleen-kaur.jpg","body":{"raw":"\\nIshleen is a Principal engineer on the Observability team. Her journey in the observability domain began as a natural progression of her interest in technology, data, and the interconnectedness of complex systems. In her free time, she enjoys traveling and inventing new games with her kid.\\n","code":"var Component=(()=>{var u=Object.create;var i=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var x=(e,n)=>()=>(n||e((n={exports:{}}).exports,n),n.exports),y=(e,n)=>{for(var t in n)i(e,t,{get:n[t],enumerable:!0})},s=(e,n,t,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let r of d(n))!f.call(e,r)&&r!==t&&i(e,r,{get:()=>n[r],enumerable:!(a=p(n,r))||a.enumerable});return e};var j=(e,n,t)=>(t=e!=null?u(g(e)):{},s(n||!e||!e.__esModule?i(t,\\"default\\",{value:e,enumerable:!0}):t,e)),b=e=>s(i({},\\"__esModule\\",{value:!0}),e);var l=x((k,c)=>{c.exports=_jsx_runtime});var v={};y(v,{default:()=>m,frontmatter:()=>_});var o=j(l()),_={title:\\"Ishleen Kaur\\",slug:\\"ishleen-kaur\\",description:\\"Principal Software Engineer\\",image:\\"ishleen-kaur.jpg\\"};function h(e){let n={p:\\"p\\",...e.components};return(0,o.jsx)(n.p,{children:\\"Ishleen is a Principal engineer on the Observability team. Her journey in the observability domain began as a natural progression of her interest in technology, data, and the interconnectedness of complex systems. In her free time, she enjoys traveling and inventing new games with her kid.\\"})}function m(e={}){let{wrapper:n}=e.components||{};return n?(0,o.jsx)(n,{...e,children:(0,o.jsx)(h,{...e})}):h(e)}return b(v);})();\\n;return Component;"},"_id":"authors/ishleen-kaur.mdx","_raw":{"sourceFilePath":"authors/ishleen-kaur.mdx","sourceFileName":"ishleen-kaur.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/ishleen-kaur"},"type":"Author","imageUrl":"/assets/images/authors/ishleen-kaur.jpg","url":"/authors/ishleen-kaur"},{"title":"Israel Ogbole","slug":"israel-ogbole","description":"","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},i=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of f(e))!d.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=g(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?x(_(t)):{},i(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),b=t=>i(a({},\\"__esModule\\",{value:!0}),t);var l=p((X,c)=>{c.exports=_jsx_runtime});var D={};j(D,{default:()=>u,frontmatter:()=>C});var r=M(l()),C={title:\\"Israel Ogbole\\",slug:\\"israel-ogbole\\",description:\\"\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function u(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return b(D);})();\\n;return Component;"},"_id":"authors/israel-ogbole.mdx","_raw":{"sourceFilePath":"authors/israel-ogbole.mdx","sourceFileName":"israel-ogbole.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/israel-ogbole"},"type":"Author","imageUrl":"","url":"/authors/israel-ogbole"},{"title":"Jack Shirazi","slug":"jack-shirazi","description":"Principal Software Engineer","image":"jack-shirazi.jpg","body":{"raw":"Jack Shirazi works in the APM Java agent team at Elastic. He is the founder of JavaPerformanceTuning.com and author of Java Performance Tuning (O’Reilly) and has been an official Java Champion since 2005. Jack has worked at all levels and all stages of IT projects in several industries, including with real-time, low latency, and highly scaled applications. As well as authoring his popular book and contributing to several other books, Jack has published over 70 articles on Java performance for various sites and magazines. He has also published over 200 newsletters for JavaPerformanceTuning.com over 20 years in addition to publishing over 10,000 Java performance and memory related tips.\\n\\n","code":"var Component=(()=>{var u=Object.create;var i=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var v=(a,e)=>()=>(e||a((e={exports:{}}).exports,e),e.exports),J=(a,e)=>{for(var n in e)i(a,n,{get:e[n],enumerable:!0})},s=(a,e,n,t)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of p(e))!g.call(a,o)&&o!==n&&i(a,o,{get:()=>e[o],enumerable:!(t=m(e,o))||t.enumerable});return a};var k=(a,e,n)=>(n=a!=null?u(f(a)):{},s(e||!a||!a.__esModule?i(n,\\"default\\",{value:a,enumerable:!0}):n,a)),w=a=>s(i({},\\"__esModule\\",{value:!0}),a);var l=v((y,c)=>{c.exports=_jsx_runtime});var j={};J(j,{default:()=>d,frontmatter:()=>b});var r=k(l()),b={title:\\"Jack Shirazi\\",slug:\\"jack-shirazi\\",description:\\"Principal Software Engineer\\",image:\\"jack-shirazi.jpg\\"};function h(a){let e={p:\\"p\\",...a.components};return(0,r.jsx)(e.p,{children:\\"Jack Shirazi works in the APM Java agent team at Elastic. He is the founder of JavaPerformanceTuning.com and author of Java Performance Tuning (O\\\\u2019Reilly) and has been an official Java Champion since 2005. Jack has worked at all levels and all stages of IT projects in several industries, including with real-time, low latency, and highly scaled applications. As well as authoring his popular book and contributing to several other books, Jack has published over 70 articles on Java performance for various sites and magazines. He has also published over 200 newsletters for JavaPerformanceTuning.com over 20 years in addition to publishing over 10,000 Java performance and memory related tips.\\"})}function d(a={}){let{wrapper:e}=a.components||{};return e?(0,r.jsx)(e,{...a,children:(0,r.jsx)(h,{...a})}):h(a)}return w(j);})();\\n;return Component;"},"_id":"authors/jack-shirazi.mdx","_raw":{"sourceFilePath":"authors/jack-shirazi.mdx","sourceFileName":"jack-shirazi.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/jack-shirazi"},"type":"Author","imageUrl":"/assets/images/authors/jack-shirazi.jpg","url":"/authors/jack-shirazi"},{"title":"Jeff Vestal","slug":"jeff-vestal","description":"Principal Customer Enterprise Architect at Elastic","image":"jeff-vestal.jpg","body":{"raw":"With a rich background spanning over a decade in financial trading firms and extensive experience with Elasticsearch, he offers a unique blend of operational acumen, engineering skill, and machine learning expertise. As a Principal Customer Enterprise Architect, he excels in crafting innovative solutions, leveraging Elasticsearch\'s advanced search capabilities, machine learning features, and generative AI integrations, adeptly guiding users to transform complex data challenges into actionable insights.\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var h=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var x=(e,n)=>()=>(n||e((n={exports:{}}).exports,n),n.exports),v=(e,n)=>{for(var t in n)a(e,t,{get:n[t],enumerable:!0})},c=(e,n,t,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let i of u(n))!m.call(e,i)&&i!==t&&a(e,i,{get:()=>n[i],enumerable:!(s=d(n,i))||s.enumerable});return e};var j=(e,n,t)=>(t=e!=null?p(h(e)):{},c(n||!e||!e.__esModule?a(t,\\"default\\",{value:e,enumerable:!0}):t,e)),E=e=>c(a({},\\"__esModule\\",{value:!0}),e);var l=x((C,o)=>{o.exports=_jsx_runtime});var b={};v(b,{default:()=>g,frontmatter:()=>_});var r=j(l()),_={title:\\"Jeff Vestal\\",slug:\\"jeff-vestal\\",description:\\"Principal Customer Enterprise Architect at Elastic\\",image:\\"jeff-vestal.jpg\\"};function f(e){let n={p:\\"p\\",...e.components};return(0,r.jsx)(n.p,{children:\\"With a rich background spanning over a decade in financial trading firms and extensive experience with Elasticsearch, he offers a unique blend of operational acumen, engineering skill, and machine learning expertise. As a Principal Customer Enterprise Architect, he excels in crafting innovative solutions, leveraging Elasticsearch\'s advanced search capabilities, machine learning features, and generative AI integrations, adeptly guiding users to transform complex data challenges into actionable insights.\\"})}function g(e={}){let{wrapper:n}=e.components||{};return n?(0,r.jsx)(n,{...e,children:(0,r.jsx)(f,{...e})}):f(e)}return E(b);})();\\n;return Component;"},"_id":"authors/jeff-vestal.mdx","_raw":{"sourceFilePath":"authors/jeff-vestal.mdx","sourceFileName":"jeff-vestal.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/jeff-vestal"},"type":"Author","imageUrl":"/assets/images/authors/jeff-vestal.jpg","url":"/authors/jeff-vestal"},{"title":"Jenny Morris","slug":"jenny-morris","description":"Principal Solutions Architect","image":"jenny-morris.jpeg","body":{"raw":"As a software technologist with over 20 years of experience, Jenny is dedicated to developing effective go-to-market strategies that enable customers to leverage technology for their business goals. She loves working closely with customers, understanding their needs, and crafting solutions to help them succeed. With a strong background in application development, Jenny still finds coding to be a source of genuine creative satisfaction.\\n\\n","code":"var Component=(()=>{var g=Object.create;var i=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,h=Object.prototype.hasOwnProperty;var y=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),x=(e,t)=>{for(var n in t)i(e,n,{get:t[n],enumerable:!0})},c=(e,t,n,s)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let o of p(t))!h.call(e,o)&&o!==n&&i(e,o,{get:()=>t[o],enumerable:!(s=m(t,o))||s.enumerable});return e};var v=(e,t,n)=>(n=e!=null?g(f(e)):{},c(t||!e||!e.__esModule?i(n,\\"default\\",{value:e,enumerable:!0}):n,e)),j=e=>c(i({},\\"__esModule\\",{value:!0}),e);var l=y((M,a)=>{a.exports=_jsx_runtime});var _={};x(_,{default:()=>u,frontmatter:()=>w});var r=v(l()),w={title:\\"Jenny Morris\\",slug:\\"jenny-morris\\",description:\\"Principal Solutions Architect\\",image:\\"jenny-morris.jpeg\\"};function d(e){let t={p:\\"p\\",...e.components};return(0,r.jsx)(t.p,{children:\\"As a software technologist with over 20 years of experience, Jenny is dedicated to developing effective go-to-market strategies that enable customers to leverage technology for their business goals. She loves working closely with customers, understanding their needs, and crafting solutions to help them succeed. With a strong background in application development, Jenny still finds coding to be a source of genuine creative satisfaction.\\"})}function u(e={}){let{wrapper:t}=e.components||{};return t?(0,r.jsx)(t,{...e,children:(0,r.jsx)(d,{...e})}):d(e)}return j(_);})();\\n;return Component;"},"_id":"authors/jenny-morris.mdx","_raw":{"sourceFilePath":"authors/jenny-morris.mdx","sourceFileName":"jenny-morris.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/jenny-morris"},"type":"Author","imageUrl":"/assets/images/authors/jenny-morris.jpeg","url":"/authors/jenny-morris"},{"title":"Joel H\xf6ner","slug":"joel-honer","description":"Senior software engineer","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),p=(e,t)=>{for(var n in t)a(e,n,{get:t[n],enumerable:!0})},s=(e,t,n,i)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let o of g(t))!d.call(e,o)&&o!==n&&a(e,o,{get:()=>t[o],enumerable:!(i=l(t,o))||i.enumerable});return e};var M=(e,t,n)=>(n=e!=null?f(_(e)):{},s(t||!e||!e.__esModule?a(n,\\"default\\",{value:e,enumerable:!0}):n,e)),h=e=>s(a({},\\"__esModule\\",{value:!0}),e);var m=j((F,c)=>{c.exports=_jsx_runtime});var C={};p(C,{default:()=>x,frontmatter:()=>w});var r=M(m()),w={title:\\"Joel H\\\\xF6ner\\",slug:\\"joel-honer\\",description:\\"Senior software engineer\\",image:\\"\\"};function u(e){return(0,r.jsx)(r.Fragment,{})}function x(e={}){let{wrapper:t}=e.components||{};return t?(0,r.jsx)(t,{...e,children:(0,r.jsx)(u,{...e})}):u(e)}return h(C);})();\\n;return Component;"},"_id":"authors/joel-honer.mdx","_raw":{"sourceFilePath":"authors/joel-honer.mdx","sourceFileName":"joel-honer.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/joel-honer"},"type":"Author","imageUrl":"","url":"/authors/joel-honer"},{"title":"John Knoepfle","slug":"john-knoepfle","description":"Principal Solutions Architect at Elastic","image":"john-knoepfle.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var i=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var x=Object.getOwnPropertyNames;var j=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var h=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),_=(t,n)=>{for(var e in n)i(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let r of x(n))!g.call(t,r)&&r!==e&&i(t,r,{get:()=>n[r],enumerable:!(a=m(n,r))||a.enumerable});return t};var d=(t,n,e)=>(e=t!=null?f(j(t)):{},c(n||!t||!t.__esModule?i(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>c(i({},\\"__esModule\\",{value:!0}),t);var l=h((F,s)=>{s.exports=_jsx_runtime});var C={};_(C,{default:()=>u,frontmatter:()=>k});var o=d(l()),k={title:\\"John Knoepfle\\",slug:\\"john-knoepfle\\",description:\\"Principal Solutions Architect at Elastic\\",image:\\"john-knoepfle.jpg\\"};function p(t){return(0,o.jsx)(o.Fragment,{})}function u(t={}){let{wrapper:n}=t.components||{};return n?(0,o.jsx)(n,{...t,children:(0,o.jsx)(p,{...t})}):p(t)}return M(C);})();\\n;return Component;"},"_id":"authors/john-knoepfle.mdx","_raw":{"sourceFilePath":"authors/john-knoepfle.mdx","sourceFileName":"john-knoepfle.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/john-knoepfle"},"type":"Author","imageUrl":"/assets/images/authors/john-knoepfle.jpg","url":"/authors/john-knoepfle"},{"title":"Jonas Kunz","slug":"jonas-kunz","description":"Principal Engineer, APM Agents","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var g=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(n,t)=>()=>(t||n((t={exports:{}}).exports,t),t.exports),j=(n,t)=>{for(var e in t)a(n,e,{get:t[e],enumerable:!0})},s=(n,t,e,i)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let o of l(t))!_.call(n,o)&&o!==e&&a(n,o,{get:()=>t[o],enumerable:!(i=f(t,o))||i.enumerable});return n};var M=(n,t,e)=>(e=n!=null?g(p(n)):{},s(t||!n||!n.__esModule?a(e,\\"default\\",{value:n,enumerable:!0}):e,n)),z=n=>s(a({},\\"__esModule\\",{value:!0}),n);var c=d((F,u)=>{u.exports=_jsx_runtime});var C={};j(C,{default:()=>x,frontmatter:()=>A});var r=M(c()),A={title:\\"Jonas Kunz\\",slug:\\"jonas-kunz\\",description:\\"Principal Engineer, APM Agents\\",image:\\"\\"};function m(n){return(0,r.jsx)(r.Fragment,{})}function x(n={}){let{wrapper:t}=n.components||{};return t?(0,r.jsx)(t,{...n,children:(0,r.jsx)(m,{...n})}):m(n)}return z(C);})();\\n;return Component;"},"_id":"authors/jonas-kunz.mdx","_raw":{"sourceFilePath":"authors/jonas-kunz.mdx","sourceFileName":"jonas-kunz.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/jonas-kunz"},"type":"Author","imageUrl":"","url":"/authors/jonas-kunz"},{"title":"Jonathan Simon","slug":"jonathan-simon","description":"Sr. Product Marketing Engineer","image":"jonathan-simon.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var j=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var _=(n,t)=>()=>(t||n((t={exports:{}}).exports,t),t.exports),h=(n,t)=>{for(var e in t)a(n,e,{get:t[e],enumerable:!0})},m=(n,t,e,i)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let r of d(t))!p.call(n,r)&&r!==e&&a(n,r,{get:()=>t[r],enumerable:!(i=j(t,r))||i.enumerable});return n};var l=(n,t,e)=>(e=n!=null?x(f(n)):{},m(t||!n||!n.__esModule?a(e,\\"default\\",{value:n,enumerable:!0}):e,n)),M=n=>m(a({},\\"__esModule\\",{value:!0}),n);var c=_((S,s)=>{s.exports=_jsx_runtime});var D={};h(D,{default:()=>g,frontmatter:()=>C});var o=l(c()),C={title:\\"Jonathan Simon\\",slug:\\"jonathan-simon\\",description:\\"Sr. Product Marketing Engineer\\",image:\\"jonathan-simon.jpg\\"};function u(n){return(0,o.jsx)(o.Fragment,{})}function g(n={}){let{wrapper:t}=n.components||{};return t?(0,o.jsx)(t,{...n,children:(0,o.jsx)(u,{...n})}):u(n)}return M(D);})();\\n;return Component;"},"_id":"authors/jonathan-simon.mdx","_raw":{"sourceFilePath":"authors/jonathan-simon.mdx","sourceFileName":"jonathan-simon.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/jonathan-simon"},"type":"Author","imageUrl":"/assets/images/authors/jonathan-simon.jpg","url":"/authors/jonathan-simon"},{"title":"Kaiyan White","slug":"kaiyan-white","description":"","image":"","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),h=(t,e)=>{for(var n in e)o(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of d(e))!l.call(t,a)&&a!==n&&o(t,a,{get:()=>e[a],enumerable:!(i=_(e,a))||i.enumerable});return t};var j=(t,e,n)=>(n=t!=null?f(g(t)):{},s(e||!t||!t.__esModule?o(n,\\"default\\",{value:t,enumerable:!0}):n,t)),y=t=>s(o({},\\"__esModule\\",{value:!0}),t);var m=p((D,c)=>{c.exports=_jsx_runtime});var w={};h(w,{default:()=>x,frontmatter:()=>M});var r=j(m()),M={title:\\"Kaiyan White\\",slug:\\"kaiyan-white\\",description:\\"\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return y(w);})();\\n;return Component;"},"_id":"authors/kaiyan-white.mdx","_raw":{"sourceFilePath":"authors/kaiyan-white.mdx","sourceFileName":"kaiyan-white.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/kaiyan-white"},"type":"Author","imageUrl":"","url":"/authors/kaiyan-white"},{"title":"Karthik Kalyanaraman","slug":"karthik-kalyanaraman","description":"Co-founder and CTO of Langtrace AI","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var o=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var k=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var a in n)o(t,a,{get:n[a],enumerable:!0})},c=(t,n,a,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let e of l(n))!g.call(t,e)&&e!==a&&o(t,e,{get:()=>n[e],enumerable:!(i=d(n,e))||i.enumerable});return t};var C=(t,n,a)=>(a=t!=null?x(_(t)):{},c(n||!t||!t.__esModule?o(a,\\"default\\",{value:t,enumerable:!0}):a,t)),h=t=>c(o({},\\"__esModule\\",{value:!0}),t);var s=k((D,m)=>{m.exports=_jsx_runtime});var y={};p(y,{default:()=>f,frontmatter:()=>j});var r=C(s()),j={title:\\"Karthik Kalyanaraman\\",slug:\\"karthik-kalyanaraman\\",description:\\"Co-founder and CTO of Langtrace AI\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function f(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return h(y);})();\\n;return Component;"},"_id":"authors/karthik-kalyanaraman.mdx","_raw":{"sourceFilePath":"authors/karthik-kalyanaraman.mdx","sourceFileName":"karthik-kalyanaraman.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/karthik-kalyanaraman"},"type":"Author","imageUrl":"","url":"/authors/karthik-kalyanaraman"},{"title":"Katrin Freihofner","slug":"katrin-freihofner","description":"Product Manager","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),M=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!l.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(i=d(e,o))||i.enumerable});return t};var h=(t,e,n)=>(n=t!=null?x(_(t)):{},c(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>c(a({},\\"__esModule\\",{value:!0}),t);var u=p((X,s)=>{s.exports=_jsx_runtime});var C={};M(C,{default:()=>m,frontmatter:()=>F});var r=h(u()),F={title:\\"Katrin Freihofner\\",slug:\\"katrin-freihofner\\",description:\\"Product Manager\\",image:\\"\\"};function f(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(f,{...t})}):f(t)}return j(C);})();\\n;return Component;"},"_id":"authors/katrin-freihofner.mdx","_raw":{"sourceFilePath":"authors/katrin-freihofner.mdx","sourceFileName":"katrin-freihofner.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/katrin-freihofner"},"type":"Author","imageUrl":"","url":"/authors/katrin-freihofner"},{"title":"Lalit Satapathy","slug":"lalit-satapathy","description":"Senior Manager Software Engineering","image":"lalit-satapathy.jpeg","body":{"raw":"Lalit is a team lead in Elastic Observability. He is excited to dive deep into evolving domains of observability and learn new technologies. He has recently worked on Elastic\'s journey on time series data base (TSDB) for metrics and OpenTelemetry adoption.\\n\\n","code":"var Component=(()=>{var m=Object.create;var i=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var y=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var x=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),h=(e,t)=>{for(var n in t)i(e,n,{get:t[n],enumerable:!0})},s=(e,t,n,r)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let a of y(t))!g.call(e,a)&&a!==n&&i(e,a,{get:()=>t[a],enumerable:!(r=u(t,a))||r.enumerable});return e};var b=(e,t,n)=>(n=e!=null?m(f(e)):{},s(t||!e||!e.__esModule?i(n,\\"default\\",{value:e,enumerable:!0}):n,e)),j=e=>s(i({},\\"__esModule\\",{value:!0}),e);var c=x((M,l)=>{l.exports=_jsx_runtime});var _={};h(_,{default:()=>p,frontmatter:()=>v});var o=b(c()),v={title:\\"Lalit Satapathy\\",slug:\\"lalit-satapathy\\",description:\\"Senior Manager Software Engineering\\",image:\\"lalit-satapathy.jpeg\\"};function d(e){let t={p:\\"p\\",...e.components};return(0,o.jsx)(t.p,{children:\\"Lalit is a team lead in Elastic Observability. He is excited to dive deep into evolving domains of observability and learn new technologies. He has recently worked on Elastic\'s journey on time series data base (TSDB) for metrics and OpenTelemetry adoption.\\"})}function p(e={}){let{wrapper:t}=e.components||{};return t?(0,o.jsx)(t,{...e,children:(0,o.jsx)(d,{...e})}):d(e)}return j(_);})();\\n;return Component;"},"_id":"authors/lalit-satapathy.mdx","_raw":{"sourceFilePath":"authors/lalit-satapathy.mdx","sourceFileName":"lalit-satapathy.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/lalit-satapathy"},"type":"Author","imageUrl":"/assets/images/authors/lalit-satapathy.jpeg","url":"/authors/lalit-satapathy"},{"title":"Luca Wintergerst","slug":"luca-wintergerst","description":"Product Marketing Director at Elastic","image":"luca-wintergerst.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var l=Object.create;var o=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var _=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var r in e)o(t,r,{get:e[r],enumerable:!0})},c=(t,e,r,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of d(e))!p.call(t,a)&&a!==r&&o(t,a,{get:()=>e[a],enumerable:!(i=x(e,a))||i.enumerable});return t};var M=(t,e,r)=>(r=t!=null?l(f(t)):{},c(e||!t||!t.__esModule?o(r,\\"default\\",{value:t,enumerable:!0}):r,t)),w=t=>c(o({},\\"__esModule\\",{value:!0}),t);var u=_((L,s)=>{s.exports=_jsx_runtime});var C={};j(C,{default:()=>m,frontmatter:()=>D});var n=M(u()),D={title:\\"Luca Wintergerst\\",slug:\\"luca-wintergerst\\",description:\\"Product Marketing Director at Elastic\\",image:\\"luca-wintergerst.jpg\\"};function g(t){return(0,n.jsx)(n.Fragment,{})}function m(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(g,{...t})}):g(t)}return w(C);})();\\n;return Component;"},"_id":"authors/luca-wintergerst.mdx","_raw":{"sourceFilePath":"authors/luca-wintergerst.mdx","sourceFileName":"luca-wintergerst.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/luca-wintergerst"},"type":"Author","imageUrl":"/assets/images/authors/luca-wintergerst.jpg","url":"/authors/luca-wintergerst"},{"title":"Martijn Laarman","slug":"martijn-laarman","description":"Principal Software Engineer at Elastic","image":"martijn-laarman.jpg","body":{"raw":"A .NET developer for nearly a decade and tinkering with the web since his teens, Martijn loves keyboard-driven creativity.\\n\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var j=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,x=Object.prototype.hasOwnProperty;var g=(n,t)=>()=>(t||n((t={exports:{}}).exports,t),t.exports),h=(n,t)=>{for(var e in t)a(n,e,{get:t[e],enumerable:!0})},c=(n,t,e,o)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let r of u(t))!x.call(n,r)&&r!==e&&a(n,r,{get:()=>t[r],enumerable:!(o=j(t,r))||o.enumerable});return n};var M=(n,t,e)=>(e=n!=null?p(f(n)):{},c(t||!n||!n.__esModule?a(e,\\"default\\",{value:n,enumerable:!0}):e,n)),_=n=>c(a({},\\"__esModule\\",{value:!0}),n);var m=g((E,s)=>{s.exports=_jsx_runtime});var w={};h(w,{default:()=>d,frontmatter:()=>v});var i=M(m()),v={title:\\"Martijn Laarman\\",slug:\\"martijn-laarman\\",description:\\"Principal Software Engineer at Elastic\\",image:\\"martijn-laarman.jpg\\"};function l(n){let t={p:\\"p\\",...n.components};return(0,i.jsx)(t.p,{children:\\"A .NET developer for nearly a decade and tinkering with the web since his teens, Martijn loves keyboard-driven creativity.\\"})}function d(n={}){let{wrapper:t}=n.components||{};return t?(0,i.jsx)(t,{...n,children:(0,i.jsx)(l,{...n})}):l(n)}return _(w);})();\\n;return Component;"},"_id":"authors/martijn-laarman.mdx","_raw":{"sourceFilePath":"authors/martijn-laarman.mdx","sourceFileName":"martijn-laarman.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/martijn-laarman"},"type":"Author","imageUrl":"/assets/images/authors/martijn-laarman.jpg","url":"/authors/martijn-laarman"},{"title":"Maurizio Branca","slug":"maurizio-branca","description":"Senior Software Engineer","image":"maurizio-branca.jpg","body":{"raw":"\\nMaurizio Branca is a software engineer on the Observability team. He is working on cloud observability, focusing mostly on Azure and AWS. Maurizio is passionate about open source and loves automating all the things.\\n","code":"var Component=(()=>{var p=Object.create;var i=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var x=(n,t)=>()=>(t||n((t={exports:{}}).exports,t),t.exports),z=(n,t)=>{for(var e in t)i(n,e,{get:t[e],enumerable:!0})},s=(n,t,e,r)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let o of d(t))!b.call(n,o)&&o!==e&&i(n,o,{get:()=>t[o],enumerable:!(r=g(t,o))||r.enumerable});return n};var M=(n,t,e)=>(e=n!=null?p(f(n)):{},s(t||!n||!n.__esModule?i(e,\\"default\\",{value:n,enumerable:!0}):e,n)),h=n=>s(i({},\\"__esModule\\",{value:!0}),n);var u=x((y,c)=>{c.exports=_jsx_runtime});var j={};z(j,{default:()=>l,frontmatter:()=>_});var a=M(u()),_={title:\\"Maurizio Branca\\",slug:\\"maurizio-branca\\",description:\\"Senior Software Engineer\\",image:\\"maurizio-branca.jpg\\"};function m(n){let t={p:\\"p\\",...n.components};return(0,a.jsx)(t.p,{children:\\"Maurizio Branca is a software engineer on the Observability team. He is working on cloud observability, focusing mostly on Azure and AWS. Maurizio is passionate about open source and loves automating all the things.\\"})}function l(n={}){let{wrapper:t}=n.components||{};return t?(0,a.jsx)(t,{...n,children:(0,a.jsx)(m,{...n})}):m(n)}return h(j);})();\\n;return Component;"},"_id":"authors/maurizio-branca.mdx","_raw":{"sourceFilePath":"authors/maurizio-branca.mdx","sourceFileName":"maurizio-branca.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/maurizio-branca"},"type":"Author","imageUrl":"/assets/images/authors/maurizio-branca.jpg","url":"/authors/maurizio-branca"},{"title":"Michael Hyatt","slug":"michael-hyatt","description":"Principal Solutions Architect at Elastic","image":"michael-hyatt.jpg","body":{"raw":"Michael is a solutions architect with background in Analytics, APIs and messaging. He is passionate about technology and travelling with my family.\\n\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var x=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),f=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,c)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let i of d(n))!y.call(t,i)&&i!==e&&a(t,i,{get:()=>n[i],enumerable:!(c=p(n,i))||c.enumerable});return t};var M=(t,n,e)=>(e=t!=null?u(g(t)):{},s(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),_=t=>s(a({},\\"__esModule\\",{value:!0}),t);var l=x((b,r)=>{r.exports=_jsx_runtime});var w={};f(w,{default:()=>m,frontmatter:()=>j});var o=M(l()),j={title:\\"Michael Hyatt\\",slug:\\"michael-hyatt\\",description:\\"Principal Solutions Architect at Elastic\\",image:\\"michael-hyatt.jpg\\"};function h(t){let n={p:\\"p\\",...t.components};return(0,o.jsx)(n.p,{children:\\"Michael is a solutions architect with background in Analytics, APIs and messaging. He is passionate about technology and travelling with my family.\\"})}function m(t={}){let{wrapper:n}=t.components||{};return n?(0,o.jsx)(n,{...t,children:(0,o.jsx)(h,{...t})}):h(t)}return _(w);})();\\n;return Component;"},"_id":"authors/michael-hyatt.mdx","_raw":{"sourceFilePath":"authors/michael-hyatt.mdx","sourceFileName":"michael-hyatt.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/michael-hyatt"},"type":"Author","imageUrl":"/assets/images/authors/michael-hyatt.jpg","url":"/authors/michael-hyatt"},{"title":"Miguel Luna","slug":"miguel-luna","description":"Principal Product Manager Observability","image":null,"body":{"raw":"\\n","code":"var Component=(()=>{var g=Object.create;var i=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var M=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),_=(t,n)=>{for(var e in n)i(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,o)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let a of d(n))!p.call(t,a)&&a!==e&&i(t,a,{get:()=>n[a],enumerable:!(o=x(n,a))||o.enumerable});return t};var j=(t,n,e)=>(e=t!=null?g(f(t)):{},u(n||!t||!t.__esModule?i(e,\\"default\\",{value:t,enumerable:!0}):e,t)),b=t=>u(i({},\\"__esModule\\",{value:!0}),t);var c=M((F,l)=>{l.exports=_jsx_runtime});var C={};_(C,{default:()=>m,frontmatter:()=>y});var r=j(c()),y={title:\\"Miguel Luna\\",slug:\\"miguel-luna\\",description:\\"Principal Product Manager Observability\\",image:null};function s(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(s,{...t})}):s(t)}return b(C);})();\\n;return Component;"},"_id":"authors/miguel-luna.mdx","_raw":{"sourceFilePath":"authors/miguel-luna.mdx","sourceFileName":"miguel-luna.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/miguel-luna"},"type":"Author","imageUrl":"","url":"/authors/miguel-luna"},{"title":"Mike Birnstiehl","slug":"mike-birnstiehl","description":"","image":"","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var o=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),M=(t,e)=>{for(var n in e)o(t,n,{get:e[n],enumerable:!0})},a=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of _(e))!g.call(t,i)&&i!==n&&o(t,i,{get:()=>e[i],enumerable:!(s=f(e,i))||s.enumerable});return t};var h=(t,e,n)=>(n=t!=null?l(d(t)):{},a(e||!t||!t.__esModule?o(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>a(o({},\\"__esModule\\",{value:!0}),t);var c=p((F,m)=>{m.exports=_jsx_runtime});var C={};M(C,{default:()=>x,frontmatter:()=>k});var r=h(c()),k={title:\\"Mike Birnstiehl\\",slug:\\"mike-birnstiehl\\",description:\\"\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return j(C);})();\\n;return Component;"},"_id":"authors/mike-birnstiehl.mdx","_raw":{"sourceFilePath":"authors/mike-birnstiehl.mdx","sourceFileName":"mike-birnstiehl.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/mike-birnstiehl"},"type":"Author","imageUrl":"","url":"/authors/mike-birnstiehl"},{"title":"Mirko Bez","slug":"mirko-bez","description":"Senior Technical Consultant","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var l=Object.create;var i=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),M=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!g.call(t,o)&&o!==n&&i(t,o,{get:()=>e[o],enumerable:!(a=f(e,o))||a.enumerable});return t};var j=(t,e,n)=>(n=t!=null?l(d(t)):{},c(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>c(i({},\\"__esModule\\",{value:!0}),t);var m=p((D,s)=>{s.exports=_jsx_runtime});var k={};M(k,{default:()=>x,frontmatter:()=>h});var r=j(m()),h={title:\\"Mirko Bez\\",slug:\\"mirko-bez\\",description:\\"Senior Technical Consultant\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return C(k);})();\\n;return Component;"},"_id":"authors/mirko-bez.mdx","_raw":{"sourceFilePath":"authors/mirko-bez.mdx","sourceFileName":"mirko-bez.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/mirko-bez"},"type":"Author","imageUrl":"","url":"/authors/mirko-bez"},{"title":"Muthukumar Paramasivam","slug":"muthukumar-paramasivam","description":"Senior Software Engineer","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var l=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),M=(t,e)=>{for(var r in e)o(t,r,{get:e[r],enumerable:!0})},u=(t,e,r,m)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of p(e))!d.call(t,a)&&a!==r&&o(t,a,{get:()=>e[a],enumerable:!(m=g(e,a))||m.enumerable});return t};var h=(t,e,r)=>(r=t!=null?f(_(t)):{},u(e||!t||!t.__esModule?o(r,\\"default\\",{value:t,enumerable:!0}):r,t)),j=t=>u(o({},\\"__esModule\\",{value:!0}),t);var s=l((C,i)=>{i.exports=_jsx_runtime});var v={};M(v,{default:()=>x,frontmatter:()=>k});var n=h(s()),k={title:\\"Muthukumar Paramasivam\\",slug:\\"muthukumar-paramasivam\\",description:\\"Senior Software Engineer\\",image:\\"\\"};function c(t){return(0,n.jsx)(n.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(c,{...t})}):c(t)}return j(v);})();\\n;return Component;"},"_id":"authors/muthukumar-paramasivam.mdx","_raw":{"sourceFilePath":"authors/muthukumar-paramasivam.mdx","sourceFileName":"muthukumar-paramasivam.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/muthukumar-paramasivam"},"type":"Author","imageUrl":"","url":"/authors/muthukumar-paramasivam"},{"title":"Mykola Harmash","slug":"mykola-harmash","description":"","image":"","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var o=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var h=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)o(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,m)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of _(e))!g.call(t,a)&&a!==n&&o(t,a,{get:()=>e[a],enumerable:!(m=f(e,a))||m.enumerable});return t};var M=(t,e,n)=>(n=t!=null?l(d(t)):{},s(e||!t||!t.__esModule?o(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>s(o({},\\"__esModule\\",{value:!0}),t);var c=h((D,i)=>{i.exports=_jsx_runtime});var k={};p(k,{default:()=>x,frontmatter:()=>y});var r=M(c()),y={title:\\"Mykola Harmash\\",slug:\\"mykola-harmash\\",description:\\"\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return j(k);})();\\n;return Component;"},"_id":"authors/mykola-harmash.mdx","_raw":{"sourceFilePath":"authors/mykola-harmash.mdx","sourceFileName":"mykola-harmash.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/mykola-harmash"},"type":"Author","imageUrl":"","url":"/authors/mykola-harmash"},{"title":"Nathan Smith","slug":"nathan-smith","description":"Senior Manager, Software Engineering","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var o=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var h=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var l=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)o(t,e,{get:n[e],enumerable:!0})},m=(t,n,e,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let a of h(n))!d.call(t,a)&&a!==e&&o(t,a,{get:()=>n[a],enumerable:!(i=f(n,a))||i.enumerable});return t};var M=(t,n,e)=>(e=t!=null?x(_(t)):{},m(n||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),j=t=>m(o({},\\"__esModule\\",{value:!0}),t);var c=l((D,s)=>{s.exports=_jsx_runtime});var w={};p(w,{default:()=>g,frontmatter:()=>S});var r=M(c()),S={title:\\"Nathan Smith\\",slug:\\"nathan-smith\\",description:\\"Senior Manager, Software Engineering\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function g(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return j(w);})();\\n;return Component;"},"_id":"authors/nathan-smith.mdx","_raw":{"sourceFilePath":"authors/nathan-smith.mdx","sourceFileName":"nathan-smith.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/nathan-smith"},"type":"Author","imageUrl":"","url":"/authors/nathan-smith"},{"title":"Nicolas Ruflin","slug":"nicolas-ruflin","description":"Principal Software Engineer II at Elastic","image":"nicolas-ruflin.jpg","body":{"raw":"Nicolas Ruflin is a software engineer on the Observability team. He is passionate about automating everything through software and embraces the constancy of change in the field. When not pondering engineering problems, he enjoys doing all kinds of sports, particularly handball.\\n\\n","code":"var Component=(()=>{var p=Object.create;var i=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var h=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var x=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var t in e)i(n,t,{get:e[t],enumerable:!0})},s=(n,e,t,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of h(e))!d.call(n,o)&&o!==t&&i(n,o,{get:()=>e[o],enumerable:!(r=g(e,o))||r.enumerable});return n};var y=(n,e,t)=>(t=n!=null?p(m(n)):{},s(e||!n||!n.__esModule?i(t,\\"default\\",{value:n,enumerable:!0}):t,n)),j=n=>s(i({},\\"__esModule\\",{value:!0}),n);var c=x((v,l)=>{l.exports=_jsx_runtime});var w={};b(w,{default:()=>f,frontmatter:()=>_});var a=y(c()),_={title:\\"Nicolas Ruflin\\",slug:\\"nicolas-ruflin\\",description:\\"Principal Software Engineer II at Elastic\\",image:\\"nicolas-ruflin.jpg\\"};function u(n){let e={p:\\"p\\",...n.components};return(0,a.jsx)(e.p,{children:\\"Nicolas Ruflin is a software engineer on the Observability team. He is passionate about automating everything through software and embraces the constancy of change in the field. When not pondering engineering problems, he enjoys doing all kinds of sports, particularly handball.\\"})}function f(n={}){let{wrapper:e}=n.components||{};return e?(0,a.jsx)(e,{...n,children:(0,a.jsx)(u,{...n})}):u(n)}return j(w);})();\\n;return Component;"},"_id":"authors/nicolas-ruflin.mdx","_raw":{"sourceFilePath":"authors/nicolas-ruflin.mdx","sourceFileName":"nicolas-ruflin.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/nicolas-ruflin"},"type":"Author","imageUrl":"/assets/images/authors/nicolas-ruflin.jpg","url":"/authors/nicolas-ruflin"},{"title":"Pawel Filipczak","slug":"pawel-filipczak","description":"Principal Engineer, APM Agents","image":"pawel-filipczak.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var x=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of g(e))!_.call(t,i)&&i!==n&&a(t,i,{get:()=>e[i],enumerable:!(o=f(e,i))||o.enumerable});return t};var w=(t,e,n)=>(n=t!=null?u(x(t)):{},c(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>c(a({},\\"__esModule\\",{value:!0}),t);var p=d((P,l)=>{l.exports=_jsx_runtime});var z={};j(z,{default:()=>m,frontmatter:()=>k});var r=w(p()),k={title:\\"Pawel Filipczak\\",slug:\\"pawel-filipczak\\",description:\\"Principal Engineer, APM Agents\\",image:\\"pawel-filipczak.jpg\\"};function s(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(s,{...t})}):s(t)}return M(z);})();\\n;return Component;"},"_id":"authors/pawel-filipczak.mdx","_raw":{"sourceFilePath":"authors/pawel-filipczak.mdx","sourceFileName":"pawel-filipczak.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/pawel-filipczak"},"type":"Author","imageUrl":"/assets/images/authors/pawel-filipczak.jpg","url":"/authors/pawel-filipczak"},{"title":"Peter Titov","slug":"peter-titov","description":"Global Solutions Architect at Elastic","image":"peter-titov.jpeg","body":{"raw":"\\n","code":"var Component=(()=>{var p=Object.create;var i=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of f(e))!_.call(t,o)&&o!==n&&i(t,o,{get:()=>e[o],enumerable:!(a=x(e,o))||a.enumerable});return t};var v=(t,e,n)=>(n=t!=null?p(g(t)):{},c(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>c(i({},\\"__esModule\\",{value:!0}),t);var u=d((F,s)=>{s.exports=_jsx_runtime});var C={};j(C,{default:()=>m,frontmatter:()=>h});var r=v(u()),h={title:\\"Peter Titov\\",slug:\\"peter-titov\\",description:\\"Global Solutions Architect at Elastic\\",image:\\"peter-titov.jpeg\\"};function l(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(l,{...t})}):l(t)}return M(C);})();\\n;return Component;"},"_id":"authors/peter-titov.mdx","_raw":{"sourceFilePath":"authors/peter-titov.mdx","sourceFileName":"peter-titov.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/peter-titov"},"type":"Author","imageUrl":"/assets/images/authors/peter-titov.jpeg","url":"/authors/peter-titov"},{"title":"Philipp Kahr","slug":"philipp-kahr","description":"Principal Consulting Architect","image":"philipp-kahr.jpg","body":{"raw":"Philipp is a consultant on the services EMEA team working with different customers that spawn from small to large multinational companies. Prior to Elastic, he was a infrastructure and cloud architect at T-System. In his spare time, he likes to find curious ways to analyse and visualise any form of data.\\n\\n","code":"var Component=(()=>{var h=Object.create;var a=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,x=Object.prototype.hasOwnProperty;var g=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),w=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,r)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let i of f(n))!x.call(t,i)&&i!==e&&a(t,i,{get:()=>n[i],enumerable:!(r=m(n,i))||r.enumerable});return t};var y=(t,n,e)=>(e=t!=null?h(d(t)):{},s(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),_=t=>s(a({},\\"__esModule\\",{value:!0}),t);var l=g((P,c)=>{c.exports=_jsx_runtime});var k={};w(k,{default:()=>u,frontmatter:()=>j});var o=y(l()),j={title:\\"Philipp Kahr\\",slug:\\"philipp-kahr\\",description:\\"Principal Consulting Architect\\",image:\\"philipp-kahr.jpg\\"};function p(t){let n={p:\\"p\\",...t.components};return(0,o.jsx)(n.p,{children:\\"Philipp is a consultant on the services EMEA team working with different customers that spawn from small to large multinational companies. Prior to Elastic, he was a infrastructure and cloud architect at T-System. In his spare time, he likes to find curious ways to analyse and visualise any form of data.\\"})}function u(t={}){let{wrapper:n}=t.components||{};return n?(0,o.jsx)(n,{...t,children:(0,o.jsx)(p,{...t})}):p(t)}return _(k);})();\\n;return Component;"},"_id":"authors/philipp-kahr.mdx","_raw":{"sourceFilePath":"authors/philipp-kahr.mdx","sourceFileName":"philipp-kahr.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/philipp-kahr"},"type":"Author","imageUrl":"/assets/images/authors/philipp-kahr.jpg","url":"/authors/philipp-kahr"},{"title":"Rema Subramanian","slug":"rema-s","description":"Sr Customer Architect","image":"rema-s.jpeg","body":{"raw":"Rema enjoys everything architecture, engineering and data. In the past, she has implemented several healthcare protocols for provider and payor products. Along the way, she has worked in the engineering, infrastructure, security, DevOps teams. Now she loves to work with customers imparting all the learning and help them solve cool data use cases\\n\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var x=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),v=(e,t)=>{for(var n in t)o(e,n,{get:t[n],enumerable:!0})},i=(e,t,n,s)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let r of d(t))!f.call(e,r)&&r!==n&&o(e,r,{get:()=>t[r],enumerable:!(s=u(t,r))||s.enumerable});return e};var w=(e,t,n)=>(n=e!=null?p(g(e)):{},i(t||!e||!e.__esModule?o(n,\\"default\\",{value:e,enumerable:!0}):n,e)),y=e=>i(o({},\\"__esModule\\",{value:!0}),e);var m=x((D,c)=>{c.exports=_jsx_runtime});var _={};v(_,{default:()=>l,frontmatter:()=>j});var a=w(m()),j={title:\\"Rema Subramanian\\",slug:\\"rema-s\\",description:\\"Sr Customer Architect\\",image:\\"rema-s.jpeg\\"};function h(e){let t={p:\\"p\\",...e.components};return(0,a.jsx)(t.p,{children:\\"Rema enjoys everything architecture, engineering and data. In the past, she has implemented several healthcare protocols for provider and payor products. Along the way, she has worked in the engineering, infrastructure, security, DevOps teams. Now she loves to work with customers imparting all the learning and help them solve cool data use cases\\"})}function l(e={}){let{wrapper:t}=e.components||{};return t?(0,a.jsx)(t,{...e,children:(0,a.jsx)(h,{...e})}):h(e)}return y(_);})();\\n;return Component;"},"_id":"authors/rema-s.mdx","_raw":{"sourceFilePath":"authors/rema-s.mdx","sourceFileName":"rema-s.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/rema-s"},"type":"Author","imageUrl":"/assets/images/authors/rema-s.jpeg","url":"/authors/rema-s"},{"title":"Riccardo Magliocchetti","slug":"riccardo-magliocchetti","description":"","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var d=Object.create;var c=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),M=(t,e)=>{for(var n in e)c(t,n,{get:e[n],enumerable:!0})},a=(t,e,n,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!_.call(t,o)&&o!==n&&c(t,o,{get:()=>e[o],enumerable:!(i=g(e,o))||i.enumerable});return t};var h=(t,e,n)=>(n=t!=null?d(f(t)):{},a(e||!t||!t.__esModule?c(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>a(c({},\\"__esModule\\",{value:!0}),t);var s=p((X,m)=>{m.exports=_jsx_runtime});var D={};M(D,{default:()=>x,frontmatter:()=>C});var r=h(s()),C={title:\\"Riccardo Magliocchetti\\",slug:\\"riccardo-magliocchetti\\",description:\\"\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return j(D);})();\\n;return Component;"},"_id":"authors/riccardo-magliocchetti.mdx","_raw":{"sourceFilePath":"authors/riccardo-magliocchetti.mdx","sourceFileName":"riccardo-magliocchetti.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/riccardo-magliocchetti"},"type":"Author","imageUrl":"","url":"/authors/riccardo-magliocchetti"},{"title":"Roger Coll","slug":"roger-coll","description":"Senior Software Engineer, Observability and OpenTelemetry at Elastic","image":"roger-coll.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var _=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),j=(e,t)=>{for(var r in t)a(e,r,{get:t[r],enumerable:!0})},l=(e,t,r,i)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let o of f(t))!d.call(e,o)&&o!==r&&a(e,o,{get:()=>t[o],enumerable:!(i=x(t,o))||i.enumerable});return e};var y=(e,t,r)=>(r=e!=null?u(p(e)):{},l(t||!e||!e.__esModule?a(r,\\"default\\",{value:e,enumerable:!0}):r,e)),C=e=>l(a({},\\"__esModule\\",{value:!0}),e);var s=_((D,c)=>{c.exports=_jsx_runtime});var b={};j(b,{default:()=>m,frontmatter:()=>M});var n=y(s()),M={title:\\"Roger Coll\\",slug:\\"roger-coll\\",description:\\"Senior Software Engineer, Observability and OpenTelemetry at Elastic\\",image:\\"roger-coll.jpg\\"};function g(e){return(0,n.jsx)(n.Fragment,{})}function m(e={}){let{wrapper:t}=e.components||{};return t?(0,n.jsx)(t,{...e,children:(0,n.jsx)(g,{...e})}):g(e)}return C(b);})();\\n;return Component;"},"_id":"authors/roger-coll.mdx","_raw":{"sourceFilePath":"authors/roger-coll.mdx","sourceFileName":"roger-coll.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/roger-coll"},"type":"Author","imageUrl":"/assets/images/authors/roger-coll.jpg","url":"/authors/roger-coll"},{"title":"Shaunak Kashyap","slug":"shaunak-kashyap","description":"Manager, Fleet Platform - Control Plane at Elastic","image":"shaunak-kashyap.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var h=Object.getOwnPropertyDescriptor;var x=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var k=(t,a)=>()=>(a||t((a={exports:{}}).exports,a),a.exports),_=(t,a)=>{for(var n in a)o(t,n,{get:a[n],enumerable:!0})},u=(t,a,n,s)=>{if(a&&typeof a==\\"object\\"||typeof a==\\"function\\")for(let r of x(a))!g.call(t,r)&&r!==n&&o(t,r,{get:()=>a[r],enumerable:!(s=h(a,r))||s.enumerable});return t};var d=(t,a,n)=>(n=t!=null?p(f(t)):{},u(a||!t||!t.__esModule?o(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>u(o({},\\"__esModule\\",{value:!0}),t);var c=k((F,i)=>{i.exports=_jsx_runtime});var M={};_(M,{default:()=>m,frontmatter:()=>y});var e=d(c()),y={title:\\"Shaunak Kashyap\\",slug:\\"shaunak-kashyap\\",description:\\"Manager, Fleet Platform - Control Plane at Elastic\\",image:\\"shaunak-kashyap.jpg\\"};function l(t){return(0,e.jsx)(e.Fragment,{})}function m(t={}){let{wrapper:a}=t.components||{};return a?(0,e.jsx)(a,{...t,children:(0,e.jsx)(l,{...t})}):l(t)}return j(M);})();\\n;return Component;"},"_id":"authors/shaunak-kashyap.mdx","_raw":{"sourceFilePath":"authors/shaunak-kashyap.mdx","sourceFileName":"shaunak-kashyap.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/shaunak-kashyap"},"type":"Author","imageUrl":"/assets/images/authors/shaunak-kashyap.jpg","url":"/authors/shaunak-kashyap"},{"title":"Stephen Brown","slug":"stephen-brown","description":"Sr. Principal Customer Architect at Elastic","image":"stephen-brown.jpg","body":{"raw":"Stephen is a Sr. Principal Customer Enterprise Architect at Elastic. He loves solving \\"Data Puzzles\\" in his spare time between making and drinking esspresso.\\n","code":"var Component=(()=>{var u=Object.create;var i=Object.defineProperty;var h=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var x=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var f=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),w=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of d(e))!g.call(t,r)&&r!==n&&i(t,r,{get:()=>e[r],enumerable:!(o=h(e,r))||o.enumerable});return t};var _=(t,e,n)=>(n=t!=null?u(x(t)):{},c(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),j=t=>c(i({},\\"__esModule\\",{value:!0}),t);var p=f((D,a)=>{a.exports=_jsx_runtime});var S={};w(S,{default:()=>m,frontmatter:()=>C});var s=_(p()),C={title:\\"Stephen Brown\\",slug:\\"stephen-brown\\",description:\\"Sr. Principal Customer Architect at Elastic\\",image:\\"stephen-brown.jpg\\"};function l(t){let e={p:\\"p\\",...t.components};return(0,s.jsx)(e.p,{children:\'Stephen is a Sr. Principal Customer Enterprise Architect at Elastic. He loves solving \\"Data Puzzles\\" in his spare time between making and drinking esspresso.\'})}function m(t={}){let{wrapper:e}=t.components||{};return e?(0,s.jsx)(e,{...t,children:(0,s.jsx)(l,{...t})}):l(t)}return j(S);})();\\n;return Component;"},"_id":"authors/stephen-brown.mdx","_raw":{"sourceFilePath":"authors/stephen-brown.mdx","sourceFileName":"stephen-brown.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/stephen-brown"},"type":"Author","imageUrl":"/assets/images/authors/stephen-brown.jpg","url":"/authors/stephen-brown"},{"title":"Steve Gordon","slug":"steve-gordon","description":"Senior Software Engineer","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var d=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!l.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(i=f(e,o))||i.enumerable});return t};var M=(t,e,n)=>(n=t!=null?d(_(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),S=t=>s(a({},\\"__esModule\\",{value:!0}),t);var m=p((D,c)=>{c.exports=_jsx_runtime});var w={};j(w,{default:()=>x,frontmatter:()=>v});var r=M(m()),v={title:\\"Steve Gordon\\",slug:\\"steve-gordon\\",description:\\"Senior Software Engineer\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return S(w);})();\\n;return Component;"},"_id":"authors/steve-gordon.mdx","_raw":{"sourceFilePath":"authors/steve-gordon.mdx","sourceFileName":"steve-gordon.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/steve-gordon"},"type":"Author","imageUrl":"","url":"/authors/steve-gordon"},{"title":"Subhrata Kulshrestha","slug":"subhrata-kulshrestha","description":"Principal Product Manager I","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var l=Object.create;var o=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var p=(t,r)=>()=>(r||t((r={exports:{}}).exports,r),r.exports),_=(t,r)=>{for(var e in r)o(t,e,{get:r[e],enumerable:!0})},u=(t,r,e,s)=>{if(r&&typeof r==\\"object\\"||typeof r==\\"function\\")for(let a of d(r))!g.call(t,a)&&a!==e&&o(t,a,{get:()=>r[a],enumerable:!(s=x(r,a))||s.enumerable});return t};var M=(t,r,e)=>(e=t!=null?l(f(t)):{},u(r||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),j=t=>u(o({},\\"__esModule\\",{value:!0}),t);var c=p((F,i)=>{i.exports=_jsx_runtime});var C={};_(C,{default:()=>h,frontmatter:()=>b});var n=M(c()),b={title:\\"Subhrata Kulshrestha\\",slug:\\"subhrata-kulshrestha\\",description:\\"Principal Product Manager I\\",image:\\"\\"};function m(t){return(0,n.jsx)(n.Fragment,{})}function h(t={}){let{wrapper:r}=t.components||{};return r?(0,n.jsx)(r,{...t,children:(0,n.jsx)(m,{...t})}):m(t)}return j(C);})();\\n;return Component;"},"_id":"authors/subhrata-kulshrestha.mdx","_raw":{"sourceFilePath":"authors/subhrata-kulshrestha.mdx","sourceFileName":"subhrata-kulshrestha.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/subhrata-kulshrestha"},"type":"Author","imageUrl":"","url":"/authors/subhrata-kulshrestha"},{"title":"Sylvain Juge","slug":"sylvain-juge","description":"Principal Engineer, APM Agents","image":"sylvain-juge.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var m=Object.create;var a=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var j=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var _=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),d=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,o)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let i of j(n))!f.call(t,i)&&i!==e&&a(t,i,{get:()=>n[i],enumerable:!(o=x(n,i))||o.enumerable});return t};var y=(t,n,e)=>(e=t!=null?m(p(t)):{},s(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>s(a({},\\"__esModule\\",{value:!0}),t);var g=_((D,u)=>{u.exports=_jsx_runtime});var A={};d(A,{default:()=>l,frontmatter:()=>v});var r=y(g()),v={title:\\"Sylvain Juge\\",slug:\\"sylvain-juge\\",description:\\"Principal Engineer, APM Agents\\",image:\\"sylvain-juge.jpg\\"};function c(t){return(0,r.jsx)(r.Fragment,{})}function l(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(c,{...t})}):c(t)}return M(A);})();\\n;return Component;"},"_id":"authors/sylvain-juge.mdx","_raw":{"sourceFilePath":"authors/sylvain-juge.mdx","sourceFileName":"sylvain-juge.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/sylvain-juge"},"type":"Author","imageUrl":"/assets/images/authors/sylvain-juge.jpg","url":"/authors/sylvain-juge"},{"title":"Taha Derouiche","slug":"taha-derouiche","description":"Principal Consulting Architect","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var h=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),_=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of d(e))!g.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(i=l(e,o))||i.enumerable});return t};var j=(t,e,n)=>(n=t!=null?h(f(t)):{},c(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>c(a({},\\"__esModule\\",{value:!0}),t);var s=p((X,u)=>{u.exports=_jsx_runtime});var M={};_(M,{default:()=>x,frontmatter:()=>D});var r=j(s()),D={title:\\"Taha Derouiche\\",slug:\\"taha-derouiche\\",description:\\"Principal Consulting Architect\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return C(M);})();\\n;return Component;"},"_id":"authors/taha-derouiche.mdx","_raw":{"sourceFilePath":"authors/taha-derouiche.mdx","sourceFileName":"taha-derouiche.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/taha-derouiche"},"type":"Author","imageUrl":"","url":"/authors/taha-derouiche"},{"title":"Tamara Dancheva","slug":"tamara-dancheva","description":"Software Engineer II at Elastic","image":"","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),h=(t,e)=>{for(var n in e)o(t,n,{get:e[n],enumerable:!0})},i=(t,e,n,c)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of g(e))!_.call(t,r)&&r!==n&&o(t,r,{get:()=>e[r],enumerable:!(c=d(e,r))||c.enumerable});return t};var j=(t,e,n)=>(n=t!=null?f(l(t)):{},i(e||!t||!t.__esModule?o(n,\\"default\\",{value:t,enumerable:!0}):n,t)),D=t=>i(o({},\\"__esModule\\",{value:!0}),t);var s=p((C,m)=>{m.exports=_jsx_runtime});var v={};h(v,{default:()=>x,frontmatter:()=>M});var a=j(s()),M={title:\\"Tamara Dancheva\\",slug:\\"tamara-dancheva\\",description:\\"Software Engineer II at Elastic\\",image:\\"\\"};function u(t){return(0,a.jsx)(a.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,a.jsx)(e,{...t,children:(0,a.jsx)(u,{...t})}):u(t)}return D(v);})();\\n;return Component;"},"_id":"authors/tamara-dancheva.mdx","_raw":{"sourceFilePath":"authors/tamara-dancheva.mdx","sourceFileName":"tamara-dancheva.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/tamara-dancheva"},"type":"Author","imageUrl":"","url":"/authors/tamara-dancheva"},{"title":"Tim R\xfchsen","slug":"tim-ruhsen","description":"Senior Software Engineer","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var i=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),h=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!l.call(t,o)&&o!==n&&i(t,o,{get:()=>e[o],enumerable:!(a=g(e,o))||a.enumerable});return t};var j=(t,e,n)=>(n=t!=null?f(d(t)):{},s(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>s(i({},\\"__esModule\\",{value:!0}),t);var u=p((F,m)=>{m.exports=_jsx_runtime});var C={};h(C,{default:()=>x,frontmatter:()=>w});var r=j(u()),w={title:\\"Tim R\\\\xFChsen\\",slug:\\"tim-ruhsen\\",description:\\"Senior Software Engineer\\",image:\\"\\"};function c(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(c,{...t})}):c(t)}return M(C);})();\\n;return Component;"},"_id":"authors/tim-ruhsen.mdx","_raw":{"sourceFilePath":"authors/tim-ruhsen.mdx","sourceFileName":"tim-ruhsen.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/tim-ruhsen"},"type":"Author","imageUrl":"","url":"/authors/tim-ruhsen"},{"title":"Tom Grabowski","slug":"tom-grabowski","description":"Principal Product Manager at Elastic","image":"tom-grabowski.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var _=(t,r)=>()=>(r||t((r={exports:{}}).exports,r),r.exports),j=(t,r)=>{for(var n in r)a(t,n,{get:r[n],enumerable:!0})},s=(t,r,n,i)=>{if(r&&typeof r==\\"object\\"||typeof r==\\"function\\")for(let o of p(r))!f.call(t,o)&&o!==n&&a(t,o,{get:()=>r[o],enumerable:!(i=l(r,o))||i.enumerable});return t};var w=(t,r,n)=>(n=t!=null?x(d(t)):{},s(r||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>s(a({},\\"__esModule\\",{value:!0}),t);var m=_((D,c)=>{c.exports=_jsx_runtime});var k={};j(k,{default:()=>g,frontmatter:()=>b});var e=w(m()),b={title:\\"Tom Grabowski\\",slug:\\"tom-grabowski\\",description:\\"Principal Product Manager at Elastic\\",image:\\"tom-grabowski.jpg\\"};function u(t){return(0,e.jsx)(e.Fragment,{})}function g(t={}){let{wrapper:r}=t.components||{};return r?(0,e.jsx)(r,{...t,children:(0,e.jsx)(u,{...t})}):u(t)}return M(k);})();\\n;return Component;"},"_id":"authors/tom-grabowski.mdx","_raw":{"sourceFilePath":"authors/tom-grabowski.mdx","sourceFileName":"tom-grabowski.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/tom-grabowski"},"type":"Author","imageUrl":"/assets/images/authors/tom-grabowski.jpg","url":"/authors/tom-grabowski"},{"title":"Trent Mick","slug":"trent-mick","description":"Principal Software Engineer","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var f=Object.create;var i=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),M=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!_.call(t,o)&&o!==n&&i(t,o,{get:()=>e[o],enumerable:!(a=g(e,o))||a.enumerable});return t};var j=(t,e,n)=>(n=t!=null?f(p(t)):{},c(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),k=t=>c(i({},\\"__esModule\\",{value:!0}),t);var s=d((F,m)=>{m.exports=_jsx_runtime});var C={};M(C,{default:()=>x,frontmatter:()=>w});var r=j(s()),w={title:\\"Trent Mick\\",slug:\\"trent-mick\\",description:\\"Principal Software Engineer\\",image:\\"\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return k(C);})();\\n;return Component;"},"_id":"authors/trent-mick.mdx","_raw":{"sourceFilePath":"authors/trent-mick.mdx","sourceFileName":"trent-mick.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/trent-mick"},"type":"Author","imageUrl":"","url":"/authors/trent-mick"},{"title":"Ty Bekiares","slug":"ty-bekiares","description":"Principal Solutions Architect at Elastic","image":"ty-bekiares.jpg","body":{"raw":"Ty Bekiares is a Principal Solutions Architect with Elastic. Ty thinks about observability coming from a 20+ year background of architecting and developing production software.\\n\\n","code":"var Component=(()=>{var m=Object.create;var o=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var y=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var h=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),x=(t,e)=>{for(var n in e)o(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of f(e))!g.call(t,i)&&i!==n&&o(t,i,{get:()=>e[i],enumerable:!(a=d(e,i))||a.enumerable});return t};var b=(t,e,n)=>(n=t!=null?m(y(t)):{},c(e||!t||!t.__esModule?o(n,\\"default\\",{value:t,enumerable:!0}):n,t)),k=t=>c(o({},\\"__esModule\\",{value:!0}),t);var l=h((M,s)=>{s.exports=_jsx_runtime});var j={};x(j,{default:()=>p,frontmatter:()=>_});var r=b(l()),_={title:\\"Ty Bekiares\\",slug:\\"ty-bekiares\\",description:\\"Principal Solutions Architect at Elastic\\",image:\\"ty-bekiares.jpg\\"};function u(t){let e={p:\\"p\\",...t.components};return(0,r.jsx)(e.p,{children:\\"Ty Bekiares is a Principal Solutions Architect with Elastic. Ty thinks about observability coming from a 20+ year background of architecting and developing production software.\\"})}function p(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return k(j);})();\\n;return Component;"},"_id":"authors/ty-bekiares.mdx","_raw":{"sourceFilePath":"authors/ty-bekiares.mdx","sourceFileName":"ty-bekiares.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/ty-bekiares"},"type":"Author","imageUrl":"/assets/images/authors/ty-bekiares.jpg","url":"/authors/ty-bekiares"},{"title":"Udayasimha Theepireddy (Uday)","slug":"udayasimha-theepireddy-uday","description":"Senior Principal Solution Architect at Elastic","image":"udayasimha-theepireddy-uday.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var y=Object.create;var i=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var h=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,x=Object.prototype.hasOwnProperty;var f=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),g=(t,e)=>{for(var a in e)i(t,a,{get:e[a],enumerable:!0})},d=(t,e,a,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of h(e))!x.call(t,r)&&r!==a&&i(t,r,{get:()=>e[r],enumerable:!(o=p(e,r))||o.enumerable});return t};var _=(t,e,a)=>(a=t!=null?y(l(t)):{},d(e||!t||!t.__esModule?i(a,\\"default\\",{value:t,enumerable:!0}):a,t)),j=t=>d(i({},\\"__esModule\\",{value:!0}),t);var u=f((F,s)=>{s.exports=_jsx_runtime});var C={};g(C,{default:()=>m,frontmatter:()=>M});var n=_(u()),M={title:\\"Udayasimha Theepireddy (Uday)\\",slug:\\"udayasimha-theepireddy-uday\\",description:\\"Senior Principal Solution Architect at Elastic\\",image:\\"udayasimha-theepireddy-uday.jpg\\"};function c(t){return(0,n.jsx)(n.Fragment,{})}function m(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(c,{...t})}):c(t)}return j(C);})();\\n;return Component;"},"_id":"authors/udayasimha-theepireddy-uday.mdx","_raw":{"sourceFilePath":"authors/udayasimha-theepireddy-uday.mdx","sourceFileName":"udayasimha-theepireddy-uday.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/udayasimha-theepireddy-uday"},"type":"Author","imageUrl":"/assets/images/authors/udayasimha-theepireddy-uday.jpg","url":"/authors/udayasimha-theepireddy-uday"},{"title":"Ugo Sangiorgi","slug":"ugo-sangiorgi","description":"Principal Competitive Intelligence Manager at Elastic","image":"ugo-sangiorgi.png","body":{"raw":"\\n","code":"var Component=(()=>{var l=Object.create;var o=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var x=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),M=(t,n)=>{for(var e in n)o(t,e,{get:n[e],enumerable:!0})},g=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let i of x(n))!_.call(t,i)&&i!==e&&o(t,i,{get:()=>n[i],enumerable:!(a=p(n,i))||a.enumerable});return t};var j=(t,n,e)=>(e=t!=null?l(f(t)):{},g(n||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>g(o({},\\"__esModule\\",{value:!0}),t);var c=d((h,s)=>{s.exports=_jsx_runtime});var F={};M(F,{default:()=>m,frontmatter:()=>D});var r=j(c()),D={title:\\"Ugo Sangiorgi\\",slug:\\"ugo-sangiorgi\\",description:\\"Principal Competitive Intelligence Manager at Elastic\\",image:\\"ugo-sangiorgi.png\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return C(F);})();\\n;return Component;"},"_id":"authors/ugo-sangiorgi.mdx","_raw":{"sourceFilePath":"authors/ugo-sangiorgi.mdx","sourceFileName":"ugo-sangiorgi.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/ugo-sangiorgi"},"type":"Author","imageUrl":"/assets/images/authors/ugo-sangiorgi.png","url":"/authors/ugo-sangiorgi"},{"title":"Vinay Chandrasekhar","slug":"vinay-chandrasekhar","description":"Director, Product Management at Elastic","image":"vinay-chandrasekhar.jpg","body":{"raw":"\\n","code":"var Component=(()=>{var h=Object.create;var o=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var p=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),_=(t,n)=>{for(var a in n)o(t,a,{get:n[a],enumerable:!0})},c=(t,n,a,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let r of g(n))!l.call(t,r)&&r!==a&&o(t,r,{get:()=>n[r],enumerable:!(i=x(n,r))||i.enumerable});return t};var j=(t,n,a)=>(a=t!=null?h(f(t)):{},c(n||!t||!t.__esModule?o(a,\\"default\\",{value:t,enumerable:!0}):a,t)),y=t=>c(o({},\\"__esModule\\",{value:!0}),t);var m=p((D,s)=>{s.exports=_jsx_runtime});var k={};_(k,{default:()=>d,frontmatter:()=>M});var e=j(m()),M={title:\\"Vinay Chandrasekhar\\",slug:\\"vinay-chandrasekhar\\",description:\\"Director, Product Management at Elastic\\",image:\\"vinay-chandrasekhar.jpg\\"};function u(t){return(0,e.jsx)(e.Fragment,{})}function d(t={}){let{wrapper:n}=t.components||{};return n?(0,e.jsx)(n,{...t,children:(0,e.jsx)(u,{...t})}):u(t)}return y(k);})();\\n;return Component;"},"_id":"authors/vinay-chandrasekhar.mdx","_raw":{"sourceFilePath":"authors/vinay-chandrasekhar.mdx","sourceFileName":"vinay-chandrasekhar.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/vinay-chandrasekhar"},"type":"Author","imageUrl":"/assets/images/authors/vinay-chandrasekhar.jpg","url":"/authors/vinay-chandrasekhar"},{"title":"Vincent du Sordet","slug":"vincent-dusordet","description":"Education Architect","image":"vincent-dusordet.png","body":{"raw":"","code":"var Component=(()=>{var x=Object.create;var c=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var l=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)c(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of g(n))!_.call(t,o)&&o!==e&&c(t,o,{get:()=>n[o],enumerable:!(i=f(n,o))||i.enumerable});return t};var M=(t,n,e)=>(e=t!=null?x(p(t)):{},u(n||!t||!t.__esModule?c(e,\\"default\\",{value:t,enumerable:!0}):e,t)),h=t=>u(c({},\\"__esModule\\",{value:!0}),t);var d=l((F,a)=>{a.exports=_jsx_runtime});var C={};j(C,{default:()=>m,frontmatter:()=>v});var r=M(d()),v={title:\\"Vincent du Sordet\\",slug:\\"vincent-dusordet\\",description:\\"Education Architect\\",image:\\"vincent-dusordet.png\\"};function s(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(s,{...t})}):s(t)}return h(C);})();\\n;return Component;"},"_id":"authors/vincent.dusordet.mdx","_raw":{"sourceFilePath":"authors/vincent.dusordet.mdx","sourceFileName":"vincent.dusordet.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/vincent.dusordet"},"type":"Author","imageUrl":"/assets/images/authors/vincent-dusordet.png","url":"/authors/vincent-dusordet"},{"title":"Yemi Adejumobi","slug":"yemi-adejumobi","description":"Product Lead at Langtrace AI","body":{"raw":"\\n","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var j=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var l=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},m=(t,e,n,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of j(e))!g.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(i=f(e,o))||i.enumerable});return t};var L=(t,e,n)=>(n=t!=null?x(_(t)):{},m(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>m(a({},\\"__esModule\\",{value:!0}),t);var c=l((C,u)=>{u.exports=_jsx_runtime});var y={};p(y,{default:()=>d,frontmatter:()=>b});var r=L(c()),b={title:\\"Yemi Adejumobi\\",slug:\\"yemi-adejumobi\\",description:\\"Product Lead at Langtrace AI\\"};function s(t){return(0,r.jsx)(r.Fragment,{})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(s,{...t})}):s(t)}return M(y);})();\\n;return Component;"},"_id":"authors/yemi-adejumobi.mdx","_raw":{"sourceFilePath":"authors/yemi-adejumobi.mdx","sourceFileName":"yemi-adejumobi.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/yemi-adejumobi"},"type":"Author","imageUrl":"","url":"/authors/yemi-adejumobi"},{"title":"Yngrid Coello","slug":"yngrid-coello","description":"Senior Software Engineer","image":"","body":{"raw":"\\n","code":"var Component=(()=>{var g=Object.create;var i=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var p=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of d(e))!_.call(t,o)&&o!==n&&i(t,o,{get:()=>e[o],enumerable:!(a=x(e,o))||a.enumerable});return t};var C=(t,e,n)=>(n=t!=null?g(f(t)):{},c(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>c(i({},\\"__esModule\\",{value:!0}),t);var l=p((F,s)=>{s.exports=_jsx_runtime});var y={};j(y,{default:()=>u,frontmatter:()=>w});var r=C(l()),w={title:\\"Yngrid Coello\\",slug:\\"yngrid-coello\\",description:\\"Senior Software Engineer\\",image:\\"\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function u(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return M(y);})();\\n;return Component;"},"_id":"authors/yngrid-coello.mdx","_raw":{"sourceFilePath":"authors/yngrid-coello.mdx","sourceFileName":"yngrid-coello.mdx","sourceFileDir":"authors","contentType":"mdx","flattenedPath":"authors/yngrid-coello"},"type":"Author","imageUrl":"","url":"/authors/yngrid-coello"}]'),a=JSON.parse('[{"title":"AI Assistant","slug":"ai-assistant","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},i=(t,n,e,o)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let s of l(n))!g.call(t,s)&&s!==e&&a(t,s,{get:()=>n[s],enumerable:!(o=_(n,s))||o.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(d(t)):{},i(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),A=t=>i(a({},\\"__esModule\\",{value:!0}),t);var c=j((X,u)=>{u.exports=_jsx_runtime});var D={};p(D,{default:()=>x,frontmatter:()=>C});var r=M(c()),C={title:\\"AI Assistant\\",slug:\\"ai-assistant\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return A(D);})();\\n;return Component;"},"_id":"tags/ai-assistant.mdx","_raw":{"sourceFilePath":"tags/ai-assistant.mdx","sourceFileName":"ai-assistant.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/ai-assistant"},"type":"Tag","url":"/tags/ai-assistant"},{"title":"AIOps","slug":"aiops","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=p(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(l(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var i=g((h,c)=>{c.exports=_jsx_runtime});var F={};j(F,{default:()=>x,frontmatter:()=>D});var r=M(i()),D={title:\\"AIOps\\",slug:\\"aiops\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return C(F);})();\\n;return Component;"},"_id":"tags/aiops.mdx","_raw":{"sourceFilePath":"tags/aiops.mdx","sourceFileName":"aiops.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aiops"},"type":"Tag","url":"/tags/aiops"},{"title":"AKS","slug":"aks","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!g.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(d(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((h,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"AKS\\",slug:\\"aks\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/aks.mdx","_raw":{"sourceFilePath":"tags/aks.mdx","sourceFileName":"aks.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aks"},"type":"Tag","url":"/tags/aks"},{"title":"Amazon ECS","slug":"amazon-ecs","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,m)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!g.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(m=_(n,o))||m.enumerable});return t};var C=(t,n,e)=>(e=t!=null?f(d(t)):{},s(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>s(a({},\\"__esModule\\",{value:!0}),t);var u=j((X,c)=>{c.exports=_jsx_runtime});var D={};p(D,{default:()=>i,frontmatter:()=>z});var r=C(u()),z={title:\\"Amazon ECS\\",slug:\\"amazon-ecs\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return M(D);})();\\n;return Component;"},"_id":"tags/amazon-ecs.mdx","_raw":{"sourceFilePath":"tags/amazon-ecs.mdx","sourceFileName":"amazon-ecs.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/amazon-ecs"},"type":"Tag","url":"/tags/amazon-ecs"},{"title":"Amazon EMR","slug":"amazon-emr","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var M=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,m)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(m=_(n,o))||m.enumerable});return t};var p=(t,n,e)=>(e=t!=null?f(M(t)):{},s(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),z=t=>s(a({},\\"__esModule\\",{value:!0}),t);var c=g((X,u)=>{u.exports=_jsx_runtime});var D={};j(D,{default:()=>i,frontmatter:()=>C});var r=p(c()),C={title:\\"Amazon EMR\\",slug:\\"amazon-emr\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return z(D);})();\\n;return Component;"},"_id":"tags/amazon-emr.mdx","_raw":{"sourceFilePath":"tags/amazon-emr.mdx","sourceFileName":"amazon-emr.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/amazon-emr"},"type":"Tag","url":"/tags/amazon-emr"},{"title":"APM","slug":"apm","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,M=Object.prototype.hasOwnProperty;var d=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),g=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,m)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!M.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(m=_(n,o))||m.enumerable});return t};var j=(t,n,e)=>(e=t!=null?f(p(t)):{},s(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>s(a({},\\"__esModule\\",{value:!0}),t);var c=d((h,u)=>{u.exports=_jsx_runtime});var F={};g(F,{default:()=>i,frontmatter:()=>D});var r=j(c()),D={title:\\"APM\\",slug:\\"apm\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/apm.mdx","_raw":{"sourceFilePath":"tags/apm.mdx","sourceFileName":"apm.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/apm"},"type":"Tag","url":"/tags/apm"},{"title":"AWS API Gateway","slug":"aws-api-gateway","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),d=(t,e)=>{for(var n in e)o(t,n,{get:e[n],enumerable:!0})},u=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!p.call(t,a)&&a!==n&&o(t,a,{get:()=>e[a],enumerable:!(s=_(e,a))||s.enumerable});return t};var j=(t,e,n)=>(n=t!=null?f(l(t)):{},u(e||!t||!t.__esModule?o(n,\\"default\\",{value:t,enumerable:!0}):n,t)),y=t=>u(o({},\\"__esModule\\",{value:!0}),t);var i=w((D,c)=>{c.exports=_jsx_runtime});var A={};d(A,{default:()=>x,frontmatter:()=>M});var r=j(i()),M={title:\\"AWS API Gateway\\",slug:\\"aws-api-gateway\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return y(A);})();\\n;return Component;"},"_id":"tags/aws-api-gateway.mdx","_raw":{"sourceFilePath":"tags/aws-api-gateway.mdx","sourceFileName":"aws-api-gateway.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aws-api-gateway"},"type":"Tag","url":"/tags/aws-api-gateway"},{"title":"AWS App Runner","slug":"aws-app-runner","body":{"raw":"","code":"var Component=(()=>{var i=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,u)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(u=f(n,o))||u.enumerable});return t};var M=(t,n,e)=>(e=t!=null?i(l(t)):{},s(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),w=t=>s(a({},\\"__esModule\\",{value:!0}),t);var m=g((F,c)=>{c.exports=_jsx_runtime});var C={};j(C,{default:()=>x,frontmatter:()=>A});var r=M(m()),A={title:\\"AWS App Runner\\",slug:\\"aws-app-runner\\"};function p(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(p,{...t})}):p(t)}return w(C);})();\\n;return Component;"},"_id":"tags/aws-app-runner.mdx","_raw":{"sourceFilePath":"tags/aws-app-runner.mdx","sourceFileName":"aws-app-runner.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aws-app-runner"},"type":"Tag","url":"/tags/aws-app-runner"},{"title":"AWS Bedrock","slug":"aws-bedrock","body":{"raw":"","code":"var Component=(()=>{var d=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,c)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!g.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(c=f(e,o))||c.enumerable});return t};var M=(t,e,n)=>(n=t!=null?d(l(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),k=t=>s(a({},\\"__esModule\\",{value:!0}),t);var m=j((F,u)=>{u.exports=_jsx_runtime});var C={};p(C,{default:()=>i,frontmatter:()=>w});var r=M(m()),w={title:\\"AWS Bedrock\\",slug:\\"aws-bedrock\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return k(C);})();\\n;return Component;"},"_id":"tags/aws-bedrock.mdx","_raw":{"sourceFilePath":"tags/aws-bedrock.mdx","sourceFileName":"aws-bedrock.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aws-bedrock"},"type":"Tag","url":"/tags/aws-bedrock"},{"title":"AWS Fargate","slug":"aws-fargate","body":{"raw":"","code":"var Component=(()=>{var i=Object.create;var o=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)o(t,n,{get:e[n],enumerable:!0})},u=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of _(e))!d.call(t,a)&&a!==n&&o(t,a,{get:()=>e[a],enumerable:!(s=g(e,a))||s.enumerable});return t};var F=(t,e,n)=>(n=t!=null?i(l(t)):{},u(e||!t||!t.__esModule?o(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>u(o({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var C={};p(C,{default:()=>f,frontmatter:()=>w});var r=F(m()),w={title:\\"AWS Fargate\\",slug:\\"aws-fargate\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function f(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return M(C);})();\\n;return Component;"},"_id":"tags/aws-fargate.mdx","_raw":{"sourceFilePath":"tags/aws-fargate.mdx","sourceFileName":"aws-fargate.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aws-fargate"},"type":"Tag","url":"/tags/aws-fargate"},{"title":"AWS Kinesis Data Firehose","slug":"aws-kinesis-data-firehose","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var s=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var h=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)s(t,n,{get:e[n],enumerable:!0})},i=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of d(e))!g.call(t,o)&&o!==n&&s(t,o,{get:()=>e[o],enumerable:!(a=_(e,o))||a.enumerable});return t};var p=(t,e,n)=>(n=t!=null?f(l(t)):{},i(e||!t||!t.__esModule?s(n,\\"default\\",{value:t,enumerable:!0}):n,t)),D=t=>i(s({},\\"__esModule\\",{value:!0}),t);var c=h((C,u)=>{u.exports=_jsx_runtime});var M={};j(M,{default:()=>x,frontmatter:()=>F});var r=p(c()),F={title:\\"AWS Kinesis Data Firehose\\",slug:\\"aws-kinesis-data-firehose\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return D(M);})();\\n;return Component;"},"_id":"tags/aws-kinesis-data-firehose.mdx","_raw":{"sourceFilePath":"tags/aws-kinesis-data-firehose.mdx","sourceFileName":"aws-kinesis-data-firehose.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aws-kinesis-data-firehose"},"type":"Tag","url":"/tags/aws-kinesis-data-firehose"},{"title":"AWS VPC Flow Logs","slug":"aws-vpc-flow-logs","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var s=Object.defineProperty;var i=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var w=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),d=(t,n)=>{for(var e in n)s(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let r of g(n))!p.call(t,r)&&r!==e&&s(t,r,{get:()=>n[r],enumerable:!(a=i(n,r))||a.enumerable});return t};var j=(t,n,e)=>(e=t!=null?f(_(t)):{},c(n||!t||!t.__esModule?s(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>c(s({},\\"__esModule\\",{value:!0}),t);var l=w((L,u)=>{u.exports=_jsx_runtime});var M={};d(M,{default:()=>x,frontmatter:()=>F});var o=j(l()),F={title:\\"AWS VPC Flow Logs\\",slug:\\"aws-vpc-flow-logs\\"};function m(t){return(0,o.jsx)(o.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,o.jsx)(n,{...t,children:(0,o.jsx)(m,{...t})}):m(t)}return C(M);})();\\n;return Component;"},"_id":"tags/aws-vpc-flow-logs.mdx","_raw":{"sourceFilePath":"tags/aws-vpc-flow-logs.mdx","sourceFileName":"aws-vpc-flow-logs.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aws-vpc-flow-logs"},"type":"Tag","url":"/tags/aws-vpc-flow-logs"},{"title":"AWS VPC Flow","slug":"aws-vpc-flow","body":{"raw":"","code":"var Component=(()=>{var i=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var d=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),g=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!w.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=l(n,o))||s.enumerable});return t};var j=(t,n,e)=>(e=t!=null?i(p(t)):{},c(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>c(a({},\\"__esModule\\",{value:!0}),t);var m=d((X,u)=>{u.exports=_jsx_runtime});var M={};g(M,{default:()=>f,frontmatter:()=>F});var r=j(m()),F={title:\\"AWS VPC Flow\\",slug:\\"aws-vpc-flow\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function f(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(M);})();\\n;return Component;"},"_id":"tags/aws-vpc-flow.mdx","_raw":{"sourceFilePath":"tags/aws-vpc-flow.mdx","sourceFileName":"aws-vpc-flow.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aws-vpc-flow"},"type":"Tag","url":"/tags/aws-vpc-flow"},{"title":"AWS","slug":"aws","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!g.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(d(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),w=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var D={};p(D,{default:()=>i,frontmatter:()=>C});var r=M(m()),C={title:\\"AWS\\",slug:\\"aws\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return w(D);})();\\n;return Component;"},"_id":"tags/aws.mdx","_raw":{"sourceFilePath":"tags/aws.mdx","sourceFileName":"aws.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/aws"},"type":"Tag","url":"/tags/aws"},{"title":"Azure Container Apps","slug":"azure-container-apps","body":{"raw":"","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=f(n,o))||s.enumerable});return t};var C=(t,n,e)=>(e=t!=null?x(l(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>u(a({},\\"__esModule\\",{value:!0}),t);var i=g((F,c)=>{c.exports=_jsx_runtime});var A={};j(A,{default:()=>p,frontmatter:()=>z});var r=C(i()),z={title:\\"Azure Container Apps\\",slug:\\"azure-container-apps\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function p(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return M(A);})();\\n;return Component;"},"_id":"tags/azure-container-apps.mdx","_raw":{"sourceFilePath":"tags/azure-container-apps.mdx","sourceFileName":"azure-container-apps.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/azure-container-apps"},"type":"Tag","url":"/tags/azure-container-apps"},{"title":"Azure OpenAI","slug":"azure-openai","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,u)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!d.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(u=p(e,o))||u.enumerable});return t};var M=(t,e,n)=>(n=t!=null?f(l(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),z=t=>s(a({},\\"__esModule\\",{value:!0}),t);var i=g((F,c)=>{c.exports=_jsx_runtime});var C={};j(C,{default:()=>x,frontmatter:()=>A});var r=M(i()),A={title:\\"Azure OpenAI\\",slug:\\"azure-openai\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return z(C);})();\\n;return Component;"},"_id":"tags/azure-openai.mdx","_raw":{"sourceFilePath":"tags/azure-openai.mdx","sourceFileName":"azure-openai.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/azure-openai"},"type":"Tag","url":"/tags/azure-openai"},{"title":"Azure","slug":"azure","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,u)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!g.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(u=_(e,o))||u.enumerable});return t};var M=(t,e,n)=>(n=t!=null?f(d(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),z=t=>s(a({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var D={};p(D,{default:()=>i,frontmatter:()=>C});var r=M(m()),C={title:\\"Azure\\",slug:\\"azure\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return z(D);})();\\n;return Component;"},"_id":"tags/azure.mdx","_raw":{"sourceFilePath":"tags/azure.mdx","sourceFileName":"azure.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/azure"},"type":"Tag","url":"/tags/azure"},{"title":"Cloud Monitoring","slug":"cloud-monitoring","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var i=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var M=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var o in n)i(t,o,{get:n[o],enumerable:!0})},a=(t,n,o,u)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let r of f(n))!_.call(t,r)&&r!==o&&i(t,r,{get:()=>n[r],enumerable:!(u=d(n,r))||u.enumerable});return t};var p=(t,n,o)=>(o=t!=null?l(g(t)):{},a(n||!t||!t.__esModule?i(o,\\"default\\",{value:t,enumerable:!0}):o,t)),C=t=>a(i({},\\"__esModule\\",{value:!0}),t);var m=M((h,c)=>{c.exports=_jsx_runtime});var F={};j(F,{default:()=>x,frontmatter:()=>D});var e=p(m()),D={title:\\"Cloud Monitoring\\",slug:\\"cloud-monitoring\\"};function s(t){return(0,e.jsx)(e.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,e.jsx)(n,{...t,children:(0,e.jsx)(s,{...t})}):s(t)}return C(F);})();\\n;return Component;"},"_id":"tags/cloud-monitoring.mdx","_raw":{"sourceFilePath":"tags/cloud-monitoring.mdx","sourceFileName":"cloud-monitoring.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/cloud-monitoring"},"type":"Tag","url":"/tags/cloud-monitoring"},{"title":"Confluent","slug":"confluent","body":{"raw":"","code":"var Component=(()=>{var i=Object.create;var u=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)u(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!g.call(t,o)&&o!==e&&u(t,o,{get:()=>n[o],enumerable:!(a=l(n,o))||a.enumerable});return t};var C=(t,n,e)=>(e=t!=null?i(d(t)):{},c(n||!t||!t.__esModule?u(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>c(u({},\\"__esModule\\",{value:!0}),t);var f=j((h,s)=>{s.exports=_jsx_runtime});var F={};p(F,{default:()=>x,frontmatter:()=>D});var r=C(f()),D={title:\\"Confluent\\",slug:\\"confluent\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return M(F);})();\\n;return Component;"},"_id":"tags/confluent.mdx","_raw":{"sourceFilePath":"tags/confluent.mdx","sourceFileName":"confluent.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/confluent"},"type":"Tag","url":"/tags/confluent"},{"title":"Containers","slug":"containers","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!g.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var C=(t,n,e)=>(e=t!=null?f(d(t)):{},c(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>c(a({},\\"__esModule\\",{value:!0}),t);var u=j((h,i)=>{i.exports=_jsx_runtime});var F={};p(F,{default:()=>x,frontmatter:()=>D});var r=C(u()),D={title:\\"Containers\\",slug:\\"containers\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return M(F);})();\\n;return Component;"},"_id":"tags/containers.mdx","_raw":{"sourceFilePath":"tags/containers.mdx","sourceFileName":"containers.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/containers"},"type":"Tag","url":"/tags/containers"},{"title":"Contribution","slug":"contribution","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var i=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)i(t,e,{get:n[e],enumerable:!0})},a=(t,n,e,u)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!g.call(t,o)&&o!==e&&i(t,o,{get:()=>n[o],enumerable:!(u=_(n,o))||u.enumerable});return t};var C=(t,n,e)=>(e=t!=null?f(d(t)):{},a(n||!t||!t.__esModule?i(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>a(i({},\\"__esModule\\",{value:!0}),t);var s=j((X,c)=>{c.exports=_jsx_runtime});var D={};p(D,{default:()=>x,frontmatter:()=>b});var r=C(s()),b={title:\\"Contribution\\",slug:\\"contribution\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return M(D);})();\\n;return Component;"},"_id":"tags/contribution.mdx","_raw":{"sourceFilePath":"tags/contribution.mdx","sourceFileName":"contribution.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/contribution"},"type":"Tag","url":"/tags/contribution"},{"title":"Cost","slug":"cost","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var s=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)s(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!g.call(t,o)&&o!==e&&s(t,o,{get:()=>n[o],enumerable:!(a=_(n,o))||a.enumerable});return t};var C=(t,n,e)=>(e=t!=null?f(d(t)):{},c(n||!t||!t.__esModule?s(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>c(s({},\\"__esModule\\",{value:!0}),t);var m=j((h,u)=>{u.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=C(m()),D={title:\\"Cost\\",slug:\\"cost\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return M(F);})();\\n;return Component;"},"_id":"tags/cost.mdx","_raw":{"sourceFilePath":"tags/cost.mdx","sourceFileName":"cost.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/cost"},"type":"Tag","url":"/tags/cost"},{"title":"Digital Experience","slug":"digital-experience","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var i=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var d=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)i(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!_.call(t,o)&&o!==n&&i(t,o,{get:()=>e[o],enumerable:!(a=f(e,o))||a.enumerable});return t};var D=(t,e,n)=>(n=t!=null?l(p(t)):{},c(e||!t||!t.__esModule?i(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>c(i({},\\"__esModule\\",{value:!0}),t);var s=d((h,x)=>{x.exports=_jsx_runtime});var F={};j(F,{default:()=>m,frontmatter:()=>C});var r=D(s()),C={title:\\"Digital Experience\\",slug:\\"digital-experience\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return M(F);})();\\n;return Component;"},"_id":"tags/digital-experience.mdx","_raw":{"sourceFilePath":"tags/digital-experience.mdx","sourceFileName":"digital-experience.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/digital-experience"},"type":"Tag","url":"/tags/digital-experience"},{"title":"Distributed Tracing","slug":"distributed-tracing","body":{"raw":"","code":"var Component=(()=>{var d=Object.create;var i=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)i(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of g(n))!l.call(t,o)&&o!==e&&i(t,o,{get:()=>n[o],enumerable:!(a=f(n,o))||a.enumerable});return t};var D=(t,n,e)=>(e=t!=null?d(_(t)):{},s(n||!t||!t.__esModule?i(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>s(i({},\\"__esModule\\",{value:!0}),t);var c=j((X,u)=>{u.exports=_jsx_runtime});var C={};p(C,{default:()=>x,frontmatter:()=>b});var r=D(c()),b={title:\\"Distributed Tracing\\",slug:\\"distributed-tracing\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return M(C);})();\\n;return Component;"},"_id":"tags/distributed-tracing.mdx","_raw":{"sourceFilePath":"tags/distributed-tracing.mdx","sourceFileName":"distributed-tracing.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/distributed-tracing"},"type":"Tag","url":"/tags/distributed-tracing"},{"title":"Elastic Common Schema (ECS)","slug":"ecs","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var C=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},m=(t,e,n,c)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!d.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(c=l(e,o))||c.enumerable});return t};var p=(t,e,n)=>(n=t!=null?f(C(t)):{},m(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>m(a({},\\"__esModule\\",{value:!0}),t);var u=g((F,s)=>{s.exports=_jsx_runtime});var D={};j(D,{default:()=>x,frontmatter:()=>h});var r=p(u()),h={title:\\"Elastic Common Schema (ECS)\\",slug:\\"ecs\\"};function i(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(i,{...t})}):i(t)}return M(D);})();\\n;return Component;"},"_id":"tags/ecs.mdx","_raw":{"sourceFilePath":"tags/ecs.mdx","sourceFileName":"ecs.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/ecs"},"type":"Tag","url":"/tags/ecs"},{"title":"Elastic Agent","slug":"elastic-agent","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!d.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=f(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?l(_(t)):{},c(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>c(a({},\\"__esModule\\",{value:!0}),t);var u=j((h,i)=>{i.exports=_jsx_runtime});var F={};p(F,{default:()=>x,frontmatter:()=>D});var r=M(u()),D={title:\\"Elastic Agent\\",slug:\\"elastic-agent\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return C(F);})();\\n;return Component;"},"_id":"tags/elastic-agent.mdx","_raw":{"sourceFilePath":"tags/elastic-agent.mdx","sourceFileName":"elastic-agent.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/elastic-agent"},"type":"Tag","url":"/tags/elastic-agent"},{"title":"Elastic Architecture Enhancements","slug":"elastic-architecture-enhancements","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var h=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let c of h(e))!d.call(t,c)&&c!==n&&a(t,c,{get:()=>e[c],enumerable:!(o=f(e,c))||o.enumerable});return t};var p=(t,e,n)=>(n=t!=null?l(_(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>s(a({},\\"__esModule\\",{value:!0}),t);var u=g((F,i)=>{i.exports=_jsx_runtime});var D={};j(D,{default:()=>x,frontmatter:()=>C});var r=p(u()),C={title:\\"Elastic Architecture Enhancements\\",slug:\\"elastic-architecture-enhancements\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return M(D);})();\\n;return Component;"},"_id":"tags/elastic-architecture-enhancements.mdx","_raw":{"sourceFilePath":"tags/elastic-architecture-enhancements.mdx","sourceFileName":"elastic-architecture-enhancements.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/elastic-architecture-enhancements"},"type":"Tag","url":"/tags/elastic-architecture-enhancements"},{"title":"ES|QL","slug":"es-ql","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},u=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!g.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=l(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?f(d(t)):{},u(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"ES|QL\\",slug:\\"es-ql\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/es-ql.mdx","_raw":{"sourceFilePath":"tags/es-ql.mdx","sourceFileName":"es-ql.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/es-ql"},"type":"Tag","url":"/tags/es-ql"},{"title":"ES|QL","slug":"esql","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},u=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!g.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=l(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?f(d(t)):{},u(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"ES|QL\\",slug:\\"esql\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/esql.mdx","_raw":{"sourceFilePath":"tags/esql.mdx","sourceFileName":"esql.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/esql"},"type":"Tag","url":"/tags/esql"},{"title":"ESRE","slug":"esre","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},u=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!g.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=_(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?f(d(t)):{},u(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var E={};p(E,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"ESRE\\",slug:\\"esre\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(E);})();\\n;return Component;"},"_id":"tags/esre.mdx","_raw":{"sourceFilePath":"tags/esre.mdx","sourceFileName":"esre.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/esre"},"type":"Tag","url":"/tags/esre"},{"title":"GenAI","slug":"genai","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of g(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(l(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var i=j((h,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>x,frontmatter:()=>D});var r=M(i()),D={title:\\"GenAI\\",slug:\\"genai\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return C(F);})();\\n;return Component;"},"_id":"tags/genai.mdx","_raw":{"sourceFilePath":"tags/genai.mdx","sourceFileName":"genai.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/genai"},"type":"Tag","url":"/tags/genai"},{"title":"GitOps","slug":"gitops","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var s=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var d=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)s(t,e,{get:n[e],enumerable:!0})},i=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!l.call(t,o)&&o!==e&&s(t,o,{get:()=>n[o],enumerable:!(a=p(n,o))||a.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(g(t)):{},i(n||!t||!t.__esModule?s(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>i(s({},\\"__esModule\\",{value:!0}),t);var c=d((h,u)=>{u.exports=_jsx_runtime});var F={};j(F,{default:()=>x,frontmatter:()=>D});var r=M(c()),D={title:\\"GitOps\\",slug:\\"gitops\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return C(F);})();\\n;return Component;"},"_id":"tags/gitops.mdx","_raw":{"sourceFilePath":"tags/gitops.mdx","sourceFileName":"gitops.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/gitops"},"type":"Tag","url":"/tags/gitops"},{"title":"GKE","slug":"gke","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},u=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!d.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=_(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?f(l(t)):{},u(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((h,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"GKE\\",slug:\\"gke\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/gke.mdx","_raw":{"sourceFilePath":"tags/gke.mdx","sourceFileName":"gke.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/gke"},"type":"Tag","url":"/tags/gke"},{"title":"Go","slug":"go","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of g(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(l(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((h,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"Go\\",slug:\\"go\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/go.mdx","_raw":{"sourceFilePath":"tags/go.mdx","sourceFileName":"go.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/go"},"type":"Tag","url":"/tags/go"},{"title":"Google Cloud Run","slug":"google-cloud-run","body":{"raw":"","code":"var Component=(()=>{var g=Object.create;var u=Object.defineProperty;var i=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)u(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let r of d(n))!_.call(t,r)&&r!==e&&u(t,r,{get:()=>n[r],enumerable:!(a=i(n,r))||a.enumerable});return t};var C=(t,n,e)=>(e=t!=null?g(f(t)):{},c(n||!t||!t.__esModule?u(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>c(u({},\\"__esModule\\",{value:!0}),t);var s=j((h,l)=>{l.exports=_jsx_runtime});var F={};p(F,{default:()=>x,frontmatter:()=>D});var o=C(s()),D={title:\\"Google Cloud Run\\",slug:\\"google-cloud-run\\"};function m(t){return(0,o.jsx)(o.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,o.jsx)(n,{...t,children:(0,o.jsx)(m,{...t})}):m(t)}return M(F);})();\\n;return Component;"},"_id":"tags/google-cloud-run.mdx","_raw":{"sourceFilePath":"tags/google-cloud-run.mdx","sourceFileName":"google-cloud-run.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/google-cloud-run"},"type":"Tag","url":"/tags/google-cloud-run"},{"title":"Google Cloud","slug":"google-cloud","body":{"raw":"","code":"var Component=(()=>{var g=Object.create;var u=Object.defineProperty;var i=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,_=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)u(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of d(e))!_.call(t,r)&&r!==n&&u(t,r,{get:()=>e[r],enumerable:!(a=i(e,r))||a.enumerable});return t};var C=(t,e,n)=>(n=t!=null?g(f(t)):{},c(e||!t||!t.__esModule?u(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>c(u({},\\"__esModule\\",{value:!0}),t);var s=j((h,l)=>{l.exports=_jsx_runtime});var F={};p(F,{default:()=>x,frontmatter:()=>D});var o=C(s()),D={title:\\"Google Cloud\\",slug:\\"google-cloud\\"};function m(t){return(0,o.jsx)(o.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,o.jsx)(e,{...t,children:(0,o.jsx)(m,{...t})}):m(t)}return M(F);})();\\n;return Component;"},"_id":"tags/google-cloud.mdx","_raw":{"sourceFilePath":"tags/google-cloud.mdx","sourceFileName":"google-cloud.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/google-cloud"},"type":"Tag","url":"/tags/google-cloud"},{"title":"Instrumentation","slug":"instrumentation","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!g.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(d(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((h,i)=>{i.exports=_jsx_runtime});var F={};p(F,{default:()=>x,frontmatter:()=>D});var r=M(m()),D={title:\\"Instrumentation\\",slug:\\"instrumentation\\"};function c(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(c,{...t})}):c(t)}return C(F);})();\\n;return Component;"},"_id":"tags/instrumentation.mdx","_raw":{"sourceFilePath":"tags/instrumentation.mdx","sourceFileName":"instrumentation.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/instrumentation"},"type":"Tag","url":"/tags/instrumentation"},{"title":"Java","slug":"java","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var j=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)o(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let a of j(n))!d.call(t,a)&&a!==e&&o(t,a,{get:()=>n[a],enumerable:!(s=_(n,a))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(l(t)):{},u(n||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),v=t=>u(o({},\\"__esModule\\",{value:!0}),t);var m=g((X,c)=>{c.exports=_jsx_runtime});var D={};p(D,{default:()=>i,frontmatter:()=>C});var r=M(m()),C={title:\\"Java\\",slug:\\"java\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return v(D);})();\\n;return Component;"},"_id":"tags/java.mdx","_raw":{"sourceFilePath":"tags/java.mdx","sourceFileName":"java.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/java"},"type":"Tag","url":"/tags/java"},{"title":"javascript","slug":"javascript","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var j=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var d=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),g=(t,n)=>{for(var e in n)o(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let a of p(n))!l.call(t,a)&&a!==e&&o(t,a,{get:()=>n[a],enumerable:!(s=j(n,a))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(_(t)):{},c(n||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),v=t=>c(o({},\\"__esModule\\",{value:!0}),t);var u=d((X,i)=>{i.exports=_jsx_runtime});var D={};g(D,{default:()=>x,frontmatter:()=>C});var r=M(u()),C={title:\\"javascript\\",slug:\\"javascript\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return v(D);})();\\n;return Component;"},"_id":"tags/javascript.mdx","_raw":{"sourceFilePath":"tags/javascript.mdx","sourceFileName":"javascript.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/javascript"},"type":"Tag","url":"/tags/javascript"},{"title":"KQL","slug":"kql","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!g.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=l(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(d(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"KQL\\",slug:\\"kql\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/kql.mdx","_raw":{"sourceFilePath":"tags/kql.mdx","sourceFileName":"kql.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/kql"},"type":"Tag","url":"/tags/kql"},{"title":"Kubernetes","slug":"kubernetes","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var s=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)s(t,n,{get:e[n],enumerable:!0})},a=(t,e,n,u)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!g.call(t,o)&&o!==n&&s(t,o,{get:()=>e[o],enumerable:!(u=_(e,o))||u.enumerable});return t};var M=(t,e,n)=>(n=t!=null?f(d(t)):{},a(e||!t||!t.__esModule?s(n,\\"default\\",{value:t,enumerable:!0}):n,t)),b=t=>a(s({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var D={};p(D,{default:()=>i,frontmatter:()=>C});var r=M(m()),C={title:\\"Kubernetes\\",slug:\\"kubernetes\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return b(D);})();\\n;return Component;"},"_id":"tags/kubernetes.mdx","_raw":{"sourceFilePath":"tags/kubernetes.mdx","sourceFileName":"kubernetes.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/kubernetes"},"type":"Tag","url":"/tags/kubernetes"},{"title":"Log Analytics","slug":"log-analytics","body":{"raw":"","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of g(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=f(n,o))||s.enumerable});return t};var y=(t,n,e)=>(e=t!=null?x(_(t)):{},c(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>c(a({},\\"__esModule\\",{value:!0}),t);var u=j((L,i)=>{i.exports=_jsx_runtime});var D={};p(D,{default:()=>m,frontmatter:()=>C});var r=y(u()),C={title:\\"Log Analytics\\",slug:\\"log-analytics\\"};function l(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(l,{...t})}):l(t)}return M(D);})();\\n;return Component;"},"_id":"tags/log-analytics.mdx","_raw":{"sourceFilePath":"tags/log-analytics.mdx","sourceFileName":"log-analytics.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/log-analytics"},"type":"Tag","url":"/tags/log-analytics"},{"title":"logging","slug":"logging","body":{"raw":"","code":"var Component=(()=>{var x=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},i=(t,n,e,g)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of f(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(g=l(n,o))||g.enumerable});return t};var M=(t,n,e)=>(e=t!=null?x(_(t)):{},i(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>i(a({},\\"__esModule\\",{value:!0}),t);var u=j((h,s)=>{s.exports=_jsx_runtime});var F={};p(F,{default:()=>m,frontmatter:()=>D});var r=M(u()),D={title:\\"logging\\",slug:\\"logging\\"};function c(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(c,{...t})}):c(t)}return C(F);})();\\n;return Component;"},"_id":"tags/logging.mdx","_raw":{"sourceFilePath":"tags/logging.mdx","sourceFileName":"logging.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/logging"},"type":"Tag","url":"/tags/logging"},{"title":"Machine Learning","slug":"machine-learning","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var o=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,M=Object.prototype.hasOwnProperty;var d=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),h=(t,n)=>{for(var e in n)o(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,i)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let a of l(n))!M.call(t,a)&&a!==e&&o(t,a,{get:()=>n[a],enumerable:!(i=g(n,a))||i.enumerable});return t};var j=(t,n,e)=>(e=t!=null?f(_(t)):{},c(n||!t||!t.__esModule?o(e,\\"default\\",{value:t,enumerable:!0}):e,t)),p=t=>c(o({},\\"__esModule\\",{value:!0}),t);var s=d((L,m)=>{m.exports=_jsx_runtime});var D={};h(D,{default:()=>x,frontmatter:()=>C});var r=j(s()),C={title:\\"Machine Learning\\",slug:\\"machine-learning\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return p(D);})();\\n;return Component;"},"_id":"tags/machine-learning.mdx","_raw":{"sourceFilePath":"tags/machine-learning.mdx","sourceFileName":"machine-learning.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/machine-learning"},"type":"Tag","url":"/tags/machine-learning"},{"title":"Metrics","slug":"metrics","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var s=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var M=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)s(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!d.call(t,o)&&o!==n&&s(t,o,{get:()=>e[o],enumerable:!(a=_(e,o))||a.enumerable});return t};var p=(t,e,n)=>(n=t!=null?f(M(t)):{},c(e||!t||!t.__esModule?s(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>c(s({},\\"__esModule\\",{value:!0}),t);var m=g((h,i)=>{i.exports=_jsx_runtime});var F={};j(F,{default:()=>x,frontmatter:()=>D});var r=p(m()),D={title:\\"Metrics\\",slug:\\"metrics\\"};function u(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(u,{...t})}):u(t)}return C(F);})();\\n;return Component;"},"_id":"tags/metrics.mdx","_raw":{"sourceFilePath":"tags/metrics.mdx","sourceFileName":"metrics.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/metrics"},"type":"Tag","url":"/tags/metrics"},{"title":"Mobile APM","slug":"mobile-apm","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var M=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var d=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),g=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},i=(t,e,n,m)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of M(e))!p.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(m=f(e,o))||m.enumerable});return t};var j=(t,e,n)=>(n=t!=null?l(_(t)):{},i(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),b=t=>i(a({},\\"__esModule\\",{value:!0}),t);var u=d((X,s)=>{s.exports=_jsx_runtime});var D={};g(D,{default:()=>x,frontmatter:()=>C});var r=j(u()),C={title:\\"Mobile APM\\",slug:\\"mobile-apm\\"};function c(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(c,{...t})}):c(t)}return b(D);})();\\n;return Component;"},"_id":"tags/mobile-apm.mdx","_raw":{"sourceFilePath":"tags/mobile-apm.mdx","sourceFileName":"mobile-apm.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/mobile-apm"},"type":"Tag","url":"/tags/mobile-apm"},{"title":".NET","slug":"net","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!g.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(d(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((h,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\".NET\\",slug:\\"net\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/net.mdx","_raw":{"sourceFilePath":"tags/net.mdx","sourceFileName":"net.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/net"},"type":"Tag","url":"/tags/net"},{"title":"Nginx","slug":"nginx","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},i=(t,n,e,x)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(x=g(n,o))||x.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(l(t)):{},i(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>i(a({},\\"__esModule\\",{value:!0}),t);var u=j((h,s)=>{s.exports=_jsx_runtime});var F={};p(F,{default:()=>m,frontmatter:()=>D});var r=M(u()),D={title:\\"Nginx\\",slug:\\"nginx\\"};function c(t){return(0,r.jsx)(r.Fragment,{})}function m(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(c,{...t})}):c(t)}return C(F);})();\\n;return Component;"},"_id":"tags/nginx.mdx","_raw":{"sourceFilePath":"tags/nginx.mdx","sourceFileName":"nginx.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/nginx"},"type":"Tag","url":"/tags/nginx"},{"title":"NodeJS","slug":"nodejs","body":{"raw":"","code":"var Component=(()=>{var d=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var j=Object.getPrototypeOf,l=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},u=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!l.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=f(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?d(j(t)):{},u(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=g((h,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"NodeJS\\",slug:\\"nodejs\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/nodejs.mdx","_raw":{"sourceFilePath":"tags/nodejs.mdx","sourceFileName":"nodejs.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/nodejs"},"type":"Tag","url":"/tags/nodejs"},{"title":"OpenAI","slug":"openai","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=p(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(l(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var i=g((h,c)=>{c.exports=_jsx_runtime});var F={};j(F,{default:()=>x,frontmatter:()=>D});var r=M(i()),D={title:\\"OpenAI\\",slug:\\"openai\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return C(F);})();\\n;return Component;"},"_id":"tags/openai.mdx","_raw":{"sourceFilePath":"tags/openai.mdx","sourceFileName":"openai.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/openai"},"type":"Tag","url":"/tags/openai"},{"title":"Opensearch","slug":"opensearch","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),h=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!d.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=p(e,o))||s.enumerable});return t};var j=(t,e,n)=>(n=t!=null?f(l(t)):{},c(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>c(a({},\\"__esModule\\",{value:!0}),t);var m=g((X,u)=>{u.exports=_jsx_runtime});var D={};h(D,{default:()=>i,frontmatter:()=>C});var r=j(m()),C={title:\\"Opensearch\\",slug:\\"opensearch\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return M(D);})();\\n;return Component;"},"_id":"tags/opensearch.mdx","_raw":{"sourceFilePath":"tags/opensearch.mdx","sourceFileName":"opensearch.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/opensearch"},"type":"Tag","url":"/tags/opensearch"},{"title":"OpenTelemetrry","slug":"opentelemetrry","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,m)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of p(e))!d.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(m=f(e,o))||m.enumerable});return t};var y=(t,e,n)=>(n=t!=null?l(_(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>s(a({},\\"__esModule\\",{value:!0}),t);var c=g((X,u)=>{u.exports=_jsx_runtime});var D={};j(D,{default:()=>i,frontmatter:()=>C});var r=y(c()),C={title:\\"OpenTelemetrry\\",slug:\\"opentelemetrry\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return M(D);})();\\n;return Component;"},"_id":"tags/opentelemetrry.mdx","_raw":{"sourceFilePath":"tags/opentelemetrry.mdx","sourceFileName":"opentelemetrry.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/opentelemetrry"},"type":"Tag","url":"/tags/opentelemetrry"},{"title":"OpenTelemetry","slug":"opentelemetry","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),j=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},s=(t,e,n,m)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of p(e))!d.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(m=f(e,o))||m.enumerable});return t};var y=(t,e,n)=>(n=t!=null?l(_(t)):{},s(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>s(a({},\\"__esModule\\",{value:!0}),t);var c=g((X,u)=>{u.exports=_jsx_runtime});var D={};j(D,{default:()=>i,frontmatter:()=>C});var r=y(c()),C={title:\\"OpenTelemetry\\",slug:\\"opentelemetry\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return M(D);})();\\n;return Component;"},"_id":"tags/opentelemetry.mdx","_raw":{"sourceFilePath":"tags/opentelemetry.mdx","sourceFileName":"opentelemetry.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/opentelemetry"},"type":"Tag","url":"/tags/opentelemetry"},{"title":"PHP","slug":"php","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=p(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(l(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),h=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=g((P,c)=>{c.exports=_jsx_runtime});var D={};j(D,{default:()=>i,frontmatter:()=>C});var r=M(m()),C={title:\\"PHP\\",slug:\\"php\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return h(D);})();\\n;return Component;"},"_id":"tags/php.mdx","_raw":{"sourceFilePath":"tags/php.mdx","sourceFileName":"php.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/php"},"type":"Tag","url":"/tags/php"},{"title":"Prometheus","slug":"prometheus","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var s=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),h=(t,e)=>{for(var n in e)s(t,n,{get:e[n],enumerable:!0})},a=(t,e,n,u)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!d.call(t,o)&&o!==n&&s(t,o,{get:()=>e[o],enumerable:!(u=_(e,o))||u.enumerable});return t};var j=(t,e,n)=>(n=t!=null?f(p(t)):{},a(e||!t||!t.__esModule?s(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>a(s({},\\"__esModule\\",{value:!0}),t);var c=g((X,m)=>{m.exports=_jsx_runtime});var D={};h(D,{default:()=>i,frontmatter:()=>C});var r=j(c()),C={title:\\"Prometheus\\",slug:\\"prometheus\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return M(D);})();\\n;return Component;"},"_id":"tags/prometheus.mdx","_raw":{"sourceFilePath":"tags/prometheus.mdx","sourceFileName":"prometheus.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/prometheus"},"type":"Tag","url":"/tags/prometheus"},{"title":"Python","slug":"python","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var g=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),h=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var j=(t,n,e)=>(e=t!=null?f(p(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),y=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=g((F,c)=>{c.exports=_jsx_runtime});var C={};h(C,{default:()=>i,frontmatter:()=>M});var r=j(m()),M={title:\\"Python\\",slug:\\"python\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return y(C);})();\\n;return Component;"},"_id":"tags/python.mdx","_raw":{"sourceFilePath":"tags/python.mdx","sourceFileName":"python.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/python"},"type":"Tag","url":"/tags/python"},{"title":"RAG","slug":"rag","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,d=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of g(n))!d.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(l(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((h,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"RAG\\",slug:\\"rag\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/rag.mdx","_raw":{"sourceFilePath":"tags/rag.mdx","sourceFileName":"rag.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/rag"},"type":"Tag","url":"/tags/rag"},{"title":"RedHat","slug":"redhat","body":{"raw":"","code":"var Component=(()=>{var d=Object.create;var a=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},u=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!g.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=f(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?d(l(t)):{},u(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),h=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var D={};p(D,{default:()=>i,frontmatter:()=>C});var r=M(m()),C={title:\\"RedHat\\",slug:\\"redhat\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return h(D);})();\\n;return Component;"},"_id":"tags/redhat.mdx","_raw":{"sourceFilePath":"tags/redhat.mdx","sourceFileName":"redhat.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/redhat"},"type":"Tag","url":"/tags/redhat"},{"title":"Security","slug":"security","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var u=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)u(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!g.call(t,o)&&o!==n&&u(t,o,{get:()=>e[o],enumerable:!(a=_(e,o))||a.enumerable});return t};var y=(t,e,n)=>(n=t!=null?f(d(t)):{},c(e||!t||!t.__esModule?u(n,\\"default\\",{value:t,enumerable:!0}):n,t)),M=t=>c(u({},\\"__esModule\\",{value:!0}),t);var i=j((X,s)=>{s.exports=_jsx_runtime});var D={};p(D,{default:()=>x,frontmatter:()=>C});var r=y(i()),C={title:\\"Security\\",slug:\\"security\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return M(D);})();\\n;return Component;"},"_id":"tags/security.mdx","_raw":{"sourceFilePath":"tags/security.mdx","sourceFileName":"security.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/security"},"type":"Tag","url":"/tags/security"},{"title":"Serverless","slug":"serverless","body":{"raw":"","code":"var Component=(()=>{var l=Object.create;var s=Object.defineProperty;var f=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var r in e)s(t,r,{get:e[r],enumerable:!0})},u=(t,e,r,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of _(e))!g.call(t,o)&&o!==r&&s(t,o,{get:()=>e[o],enumerable:!(a=f(e,o))||a.enumerable});return t};var M=(t,e,r)=>(r=t!=null?l(d(t)):{},u(e||!t||!t.__esModule?s(r,\\"default\\",{value:t,enumerable:!0}):r,t)),v=t=>u(s({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var D={};p(D,{default:()=>i,frontmatter:()=>C});var n=M(m()),C={title:\\"Serverless\\",slug:\\"serverless\\"};function x(t){return(0,n.jsx)(n.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(x,{...t})}):x(t)}return v(D);})();\\n;return Component;"},"_id":"tags/serverless.mdx","_raw":{"sourceFilePath":"tags/serverless.mdx","sourceFileName":"serverless.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/serverless"},"type":"Tag","url":"/tags/serverless"},{"title":"SLO","slug":"slo","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var l=Object.getOwnPropertyDescriptor;var _=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of _(n))!g.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=l(n,o))||s.enumerable});return t};var M=(t,n,e)=>(e=t!=null?f(d(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),C=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((X,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"SLO\\",slug:\\"slo\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/slo.mdx","_raw":{"sourceFilePath":"tags/slo.mdx","sourceFileName":"slo.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/slo"},"type":"Tag","url":"/tags/slo"},{"title":"Synthetics","slug":"synthetics","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var s=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var h=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)s(t,e,{get:n[e],enumerable:!0})},c=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of l(n))!g.call(t,o)&&o!==e&&s(t,o,{get:()=>n[o],enumerable:!(a=_(n,o))||a.enumerable});return t};var p=(t,n,e)=>(e=t!=null?f(d(t)):{},c(n||!t||!t.__esModule?s(e,\\"default\\",{value:t,enumerable:!0}):e,t)),y=t=>c(s({},\\"__esModule\\",{value:!0}),t);var u=h((F,i)=>{i.exports=_jsx_runtime});var C={};j(C,{default:()=>x,frontmatter:()=>M});var r=p(u()),M={title:\\"Synthetics\\",slug:\\"synthetics\\"};function m(t){return(0,r.jsx)(r.Fragment,{})}function x(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(m,{...t})}):m(t)}return y(C);})();\\n;return Component;"},"_id":"tags/synthetics.mdx","_raw":{"sourceFilePath":"tags/synthetics.mdx","sourceFileName":"synthetics.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/synthetics"},"type":"Tag","url":"/tags/synthetics"},{"title":"Tracetest","slug":"tracetest","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var l=Object.getOwnPropertyNames;var d=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),p=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},c=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of l(e))!g.call(t,o)&&o!==n&&a(t,o,{get:()=>e[o],enumerable:!(s=_(e,o))||s.enumerable});return t};var M=(t,e,n)=>(n=t!=null?f(d(t)):{},c(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),C=t=>c(a({},\\"__esModule\\",{value:!0}),t);var m=j((h,u)=>{u.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>D});var r=M(m()),D={title:\\"Tracetest\\",slug:\\"tracetest\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:e}=t.components||{};return e?(0,r.jsx)(e,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return C(F);})();\\n;return Component;"},"_id":"tags/tracetest.mdx","_raw":{"sourceFilePath":"tags/tracetest.mdx","sourceFileName":"tracetest.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/tracetest"},"type":"Tag","url":"/tags/tracetest"},{"title":"TSDB","slug":"tsdb","body":{"raw":"","code":"var Component=(()=>{var f=Object.create;var a=Object.defineProperty;var _=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var l=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var j=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),p=(t,n)=>{for(var e in n)a(t,e,{get:n[e],enumerable:!0})},u=(t,n,e,s)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of d(n))!g.call(t,o)&&o!==e&&a(t,o,{get:()=>n[o],enumerable:!(s=_(n,o))||s.enumerable});return t};var D=(t,n,e)=>(e=t!=null?f(l(t)):{},u(n||!t||!t.__esModule?a(e,\\"default\\",{value:t,enumerable:!0}):e,t)),M=t=>u(a({},\\"__esModule\\",{value:!0}),t);var m=j((b,c)=>{c.exports=_jsx_runtime});var F={};p(F,{default:()=>i,frontmatter:()=>C});var r=D(m()),C={title:\\"TSDB\\",slug:\\"tsdb\\"};function x(t){return(0,r.jsx)(r.Fragment,{})}function i(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(x,{...t})}):x(t)}return M(F);})();\\n;return Component;"},"_id":"tags/tsdb.mdx","_raw":{"sourceFilePath":"tags/tsdb.mdx","sourceFileName":"tsdb.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/tsdb"},"type":"Tag","url":"/tags/tsdb"},{"title":"Universal Profiling","slug":"universal-profiling","body":{"raw":"","code":"var Component=(()=>{var m=Object.create;var i=Object.defineProperty;var x=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var _=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var d=(t,n)=>()=>(n||t((n={exports:{}}).exports,n),n.exports),j=(t,n)=>{for(var e in n)i(t,e,{get:n[e],enumerable:!0})},s=(t,n,e,a)=>{if(n&&typeof n==\\"object\\"||typeof n==\\"function\\")for(let o of g(n))!p.call(t,o)&&o!==e&&i(t,o,{get:()=>n[o],enumerable:!(a=x(n,o))||a.enumerable});return t};var M=(t,n,e)=>(e=t!=null?m(_(t)):{},s(n||!t||!t.__esModule?i(e,\\"default\\",{value:t,enumerable:!0}):e,t)),v=t=>s(i({},\\"__esModule\\",{value:!0}),t);var l=d((X,u)=>{u.exports=_jsx_runtime});var D={};j(D,{default:()=>f,frontmatter:()=>C});var r=M(l()),C={title:\\"Universal Profiling\\",slug:\\"universal-profiling\\"};function c(t){return(0,r.jsx)(r.Fragment,{})}function f(t={}){let{wrapper:n}=t.components||{};return n?(0,r.jsx)(n,{...t,children:(0,r.jsx)(c,{...t})}):c(t)}return v(D);})();\\n;return Component;"},"_id":"tags/universal-profiling.mdx","_raw":{"sourceFilePath":"tags/universal-profiling.mdx","sourceFileName":"universal-profiling.mdx","sourceFileDir":"tags","contentType":"mdx","flattenedPath":"tags/universal-profiling"},"type":"Tag","url":"/tags/universal-profiling"}]'),s=JSON.parse('[{"title":"3 models for logging with OpenTelemetry and Elastic","slug":"3-models-logging-opentelemetry","date":"2023-06-27","description":"Because OpenTelemetry increases usage of tracing and metrics with developers, logging continues to provide flexible, application-specific, and event-driven data. Explore OpenTelemetry logging and how it provides guidance on the available approaches.","image":"log_infrastructure_apm_synthetics-monitoring.jpeg","author":[{"slug":"ty-bekiares","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nArguably, [OpenTelemetry](https://www.elastic.co/blog/opentelemetry-observability) exists to (greatly) increase usage of tracing and metrics among developers. That said, logging will continue to play a critical role in providing flexible, application-specific, event-driven data. Further, OpenTelemetry has the potential to bring added value to existing application logging flows:\\n\\n1. Common metadata across tracing, metrics, and logging to facilitate contextual correlation, including metadata passed between services as part of REST or RPC APIs; this is a critical element of service observability in the age of distributed, horizontally scaled systems\\n\\n2. An optional unified data path for tracing, metrics, and logging to facilitate common tooling and signal routing to your observability backend\\n\\nAdoption of metrics and tracing among developers to date has been relatively small. Further, the number of proprietary vendors and APIs (compared to adoption rate) is relatively large. As such, OpenTelemetry took a greenfield approach to developing new, vendor-agnostic APIs for tracing and metrics. In contrast, most developers have nearly 100% log coverage across their services. Moreover, logging is largely supported by a small number of vendor-agnostic, open-source logging libraries and associated APIs (e.g., [Logback](https://logback.qos.ch) and [ILogger](https://learn.microsoft.com/en-us/dotnet/api/microsoft.extensions.logging.ilogger)). As such, [OpenTelemetry’s approach to logging](https://opentelemetry.io/docs/specs/otel/logs/#introduction) meets developers where they already are using hooks into existing, popular logging frameworks. In this way, developers can add OpenTelemetry as a log signal output without otherwise altering their code and investment in logging as an observability signal.\\n\\nNotably, logging is the least mature of OTel supported observability signals. Depending on your service’s [language](https://opentelemetry.io/docs/instrumentation/#status-and-releases), and your appetite for adventure, there exist several options for exporting logs from your services and applications and marrying them together in your observability backend.\\n\\nThe intent of this article is to explore the current state of the art of [OpenTelemetry logging](https://www.elastic.co/blog/introduction-apm-tracing-logging-customer-experience) and to provide guidance on the available approaches with the following tenants in mind:\\n\\n- Correlation of service logs with OTel-generated tracing where applicable\\n- Proper capture of exceptions\\n- Common context across tracing, metrics, and logging\\n- Support for [slf4j key-value pairs](https://www.slf4j.org/manual.html#fluent) (“structured logging”)\\n- Automatic attachment of metadata carried between services via [OTel baggage](https://opentelemetry.io/docs/concepts/signals/baggage/)\\n- Use of an Elastic\xae Observability backend\\n- Consistent data fidelity in Elastic regardless of the approach taken\\n\\n## OpenTelemetry logging models\\n\\nThree models currently exist for getting your application or service logs to Elastic with correlation to OTel tracing and baggage:\\n\\n1. Output logs from your service (alongside traces and metrics) using an embedded [OpenTelemetry Instrumentation library](https://opentelemetry.io/docs/instrumentation/#status-and-releases) to Elastic via the OTLP protocol\\n\\n2. Write logs from your service to a file scraped by the [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/), which then forwards to Elastic via the OTLP protocol\\n\\n3. Write logs from your service to a file scraped by [Elastic Agent](https://www.elastic.co/elastic-agent) (or [Filebeat](https://www.elastic.co/beats/filebeat)), which then forwards to Elastic via an Elastic-defined protocol\\n\\nNote that (1), in contrast to (2) and (3), does not involve writing service logs to a file prior to ingestion into Elastic.\\n\\n## Logging vs. span events\\n\\nIt is worth noting that most APM systems, including OpenTelemetry, include provisions for [span events](https://opentelemetry.io/docs/instrumentation/ruby/manual/#add-span-events). Like log statements, span events contain arbitrary, textual data. Additionally, span events automatically carry any custom attributes (e.g., a “user ID”) applied to the parent span, which can help with correlation and context. In this regard, it may be advantageous to translate some existing log statements (inside spans) to span events. As the name implies, of course, span events can only be emitted from within a span and thus are not intended to be a general purpose replacement for logging.\\n\\nUnlike logging, span events do not pass through existing logging frameworks and therefore cannot (practically) be written to a log file. Further, span events are technically emitted as part of trace data and follow the same data path and signal routing as other trace data.\\n\\n## Polyfill appender\\n\\nSome of the demos make use of a custom Logback [“Polyfill appender”](https://github.com/ty-elastic/otel-logging/blob/main/java-otel-log/src/main/java/com/tb93/otel/batteries/PolyfillAppender.java) (inspired by OTel’s [Logback MDC](https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/logback/logback-mdc-1.0/library)), which provides support for attaching [slf4j key-value pairs](https://www.slf4j.org/manual.html#fluent) to log messages for models (2) and (3).\\n\\n## Elastic Common Schema\\n\\nFor log messages to exhibit full fidelity within Elastic, they eventually need to be formatted in accordance with the [Elastic Common Schema](https://www.elastic.co/guide/en/ecs/current/ecs-reference.html) (ECS). In models (1) and (2), log messages remain formatted in OTel log semantics until ingested by the Elastic APM Server. The Elastic APM Server then translates OTel log semantics to ECS. In model (3), ECS is applied at the source.\\n\\nNotably, OpenTelemetry recently [adopted the Elastic Common Schema](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement) as its standard for semantic conventions going forward! As such, it is anticipated that current OTel log semantics will be updated to align with ECS.\\n\\n## Getting started\\n\\nThe included demos center around a “POJO” (no assumed framework) Java project. Java is arguably the most mature of OTel-supported languages, particularly with respect to logging options. Notably, this singular Java project was designed to support the three models of logging discussed here. In practice, you would only implement one of these models (and corresponding project dependencies).\\n\\nThe demos assume you have a working [Docker](https://www.docker.com/) environment and an [Elastic Cloud](https://www.elastic.co/cloud/) instance.\\n\\n1. git clone https://github.com/ty-elastic/otel-logging\\n\\n2. Create an .env file at the root of otel-logging with the following (appropriately filled-in) environment variables:\\n\\n```bash\\n# the service name\\nOTEL_SERVICE_NAME=app4\\n\\n# Filebeat vars\\nELASTIC_CLOUD_ID=(see https://www.elastic.co/guide/en/beats/metricbeat/current/configure-cloud-id.html)\\nELASTIC_CLOUD_AUTH=(see https://www.elastic.co/guide/en/beats/metricbeat/current/configure-cloud-id.html)\\n\\n# apm vars\\nELASTIC_APM_SERVER_ENDPOINT=(address of your Elastic Cloud APM server... i.e., https://xyz123.apm.us-central1.gcp.cloud.es.io:443)\\nELASTIC_APM_SERVER_SECRET=(see https://www.elastic.co/guide/en/apm/guide/current/secret-token.html)\\n```\\n\\n3. Start up the demo with the desired model:\\n\\n- If you want to demo logging via OTel APM Agent, run MODE=apm docker-compose up\\n- If you want to demo logging via OTel filelogreceiver, run MODE=filelogreceiver docker-compose up\\n- If you want to demo logging via Elastic filebeat, run MODE=filebeat docker-compose up\\n\\n4. Validate incoming span and correlated log data in your Elastic Cloud instance\\n\\n## Model 1: Logging via OpenTelemetry instrumentation\\n\\nThis model aligns with the long-term goals of OpenTelemetry: [integrated tracing, metrics, and logging (with common attributes) from your services](https://opentelemetry.io/docs/specs/otel/logs/#opentelemetry-solution) via the [OpenTelemetry Instrumentation libraries](https://opentelemetry.io/docs/instrumentation/#status-and-releases), without dependency on log files and scrappers.\\n\\nIn this model, your service generates log statements as it always has, using popular logging libraries (e.g., [Logback](https://logback.qos.ch) for Java). OTel provides a “Southbound hook” to Logback via the OTel [Logback Appender](https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/logback/logback-appender-1.0/library), which injects ServiceName, SpanID, TraceID, slf4j key-value pairs, and OTel baggage into log records and passes the composed records to the co-resident OpenTelemetry Instrumentation library. We further employ a [custom LogRecordProcessor](https://github.com/ty-elastic/otel-logging/blob/main/java-otel-log/src/main/java/com/tb93/otel/batteries/AddBaggageLogProcessor.java) to add baggage to the log record as attributes.\\n\\nThe OTel instrumentation library then formats the log statements per the [OTel logging spec](https://opentelemetry.io/docs/specs/otel/logs/data-model/) and ships them via OTLP to either an OTel Collector for further routing and enrichment or directly to Elastic.\\n\\nNotably, as language support improves, this model can and will be supported by runtime agent binding with auto-instrumentation where available (e.g., no code changes required for runtime languages).\\n\\nOne distinguishing advantage of this model, beyond the simplicity it affords, is the ability to more easily tie together attributes and tracing metadata directly with log statements. This inherently makes logging more useful in the context of other OTel-supported observability signals.\\n\\n### Architecture\\n\\n![model 1 architecture](/assets/images/3-models-logging-opentelemetry/elastic-blog-model-1-architecture.png)\\n\\nAlthough not explicitly pictured, an [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) can be inserted in between the service and Elastic to facilitate additional enrichment and/or signal routing or duplication across observability backends.\\n\\n### Pros\\n\\n- Simplified signal architecture and fewer “moving parts” (no files, disk utilization, or file rotation concerns)\\n- Aligns with long-term OTel vision\\n- Log statements can be (easily) decorated with OTel metadata\\n- No polyfill adapter required to support structured logging with slf4j\\n- No additional collectors/agents required\\n- Conversion to ECS happens within Elastic keeping log data vendor-agnostic until ingestion\\n- Common wireline protocol (OTLP) across tracing, metrics, and logs\\n\\n### Cons\\n\\n- Not available (yet) in many OTel-supported languages\\n- No intermediate log file for ad-hoc, on-node debugging\\n- Immature (alpha/experimental)\\n Unknown “glare” conditions, which could result in loss of log data if service exits prematurely or if the backend is unable to accept log data for an extended period of time\\n\\n### Demo\\n\\nMODE=apm docker-compose up\\n\\n## Model 2: Logging via the OpenTelemetry Collector\\n\\nGiven the cons of Model 1, it may be advantageous to consider a model that continues to leverage an actual log file intermediary between your services and your observability backend. Such a model is possible using an [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) collocated with your services (e.g., on the same host), running the [filelogreceiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/filelogreceiver/README.md) to scrape service log files.\\n\\nIn this model, your service generates log statements as it always has, using popular logging libraries (e.g., [Logback](https://logback.qos.ch) for Java). OTel provides a MDC Appender for Logback ([Logback MDC](https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/logback/logback-mdc-1.0/library)), which adds SpanID, TraceID, and Baggage to the [Logback MDC context](https://logback.qos.ch/manual/mdc.html).\\n\\nNotably, no log record structure is assumed by the OTel filelogreceiver. In the example provided, we employ the [logstash-logback-encoder](https://github.com/logfellow/logstash-logback-encoder) to JSON-encode log messages. The logstash-logback-encoder will read the OTel SpanID, TraceID, and Baggage off the MDC context and encode it into the JSON structure. Notably, logstash-logback-encoder doesn’t explicitly support [slf4j key-value pairs](https://www.slf4j.org/manual.html#fluent). It does, however, support [Logback structured arguments](https://github.com/logfellow/logstash-logback-encoder#event-specific-custom-fields), and thus I use the [Polyfill Appender](https://github.com/ty-elastic/otel-logging/blob/main/java-otel-log/src/main/java/com/tb93/otel/batteries/PolyfillAppender.java) to convert slf4j key-value pairs to Logback structured arguments.\\n\\nFrom there, we write the log lines to a log file. If you are using Kubernetes or other container orchestration in your environment, you would more typically write to stdout (console) and let the orchestration log driver write to and manage log files.\\n\\nWe then [configure](https://github.com/ty-elastic/otel-logging/blob/main/collector/filelogreceiver.yml) the OTel Collector to scrape this log file (using the filelogreceiver). Because no assumptions are made about the format of the log lines, you need to [explicitly map fields](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/stanza/docs/types/parsers.md#parsers) from your log schema to the OTel log schema.\\n\\nFrom there, the OTel Collector batches and ships the formatted log lines via OTLP to Elastic.\\n\\n### Architecture\\n\\n![model 2 architecture](/assets/images/3-models-logging-opentelemetry/elastic-blog-model-2-architecture.png)\\n\\n### Pros\\n\\n- Easy to debug (you can manually read the intermediate log file)\\n- Inherent file-based FIFO buffer\\n- Less susceptible to “glare” conditions when service prematurely exits\\n- Conversion to ECS happens within Elastic keeping log data vendor-agnostic until ingestion\\n- Common wireline protocol (OTLP) across tracing, metrics, and logs\\n\\n### Cons\\n\\n- All the headaches of file-based logging (rotation, disk overflow)\\n- Beta quality and not yet proven in the field\\n- No support for slf4j key-value pairs\\n\\n### Demo\\n\\nMODE=filelogreceiver docker-compose up\\n\\n## Model 3: Logging via Elastic Agent (or Filebeat)\\n\\nAlthough the second model described affords some resilience as a function of the backing file, the OTel Collector filelogreceiver module is still decidedly [“beta”](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/filelogreceiver) in quality. Because of the importance of logs as a debugging tool, today I generally recommend that customers continue to import logs into Elastic using the field-proven [Elastic Agent](https://www.elastic.co/elastic-agent) or [Filebeat](https://www.elastic.co/beats/filebeat) scrappers. Elastic Agent and Filebeat have many years of field maturity under their collective belt. Further, it is often advantageous to deploy Elastic Agent anyway to capture the multitude of signals outside the purview of OpenTelemetry (e.g., deep Kubernetes and host metrics, security, etc.).\\n\\nIn this model, your service generates log statements as it always has, using popular logging libraries (e.g., [Logback](https://logback.qos.ch) for Java). As with model 2, we employ OTel’s [Logback MDC](https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/logback/logback-mdc-1.0/library) to add SpanID, TraceID, and Baggage to the [Logback MDC context](https://logback.qos.ch/manual/mdc.html).\\n\\nFrom there, we employ the [Elastic ECS Encoder](https://www.elastic.co/guide/en/ecs-logging/java/current/setup.html) to encode log statements compliant to the Elastic Common Schema. The Elastic ECS Encoder will read the OTel SpanID, TraceID, and Baggage off the MDC context and encode it into the JSON structure. Similar to model 2, the Elastic ECS Encoder doesn’t support sl4f key-vair arguments. Curiously, the Elastic ECS encoder also doesn’t appear to support Logback structured arguments. Thus, within the Polyfill Appender, I add slf4j key-value pairs as MDC context. This is less than ideal, however, since MDC forces all values to be strings.\\n\\nFrom there, we write the log lines to a log file. If you are using Kubernetes or other container orchestration in your environment, you would more typically write to stdout (console) and let the orchestration log driver write to and manage log files.We then configure Elastic Agent or Filebeat to scrape the log file. Notably, the Elastic ECS Encoder does not currently translate incoming OTel SpanID and TraceID variables on the MDC. Thus, we need to perform manual translation of these variables in the [Filebeat (or Elastic Agent) configuration](https://github.com/ty-elastic/otel-logging/blob/main/filebeat.yml) to map them to their ECS equivalent.\\n\\n## Architecture\\n\\n![model 3 architecture](/assets/images/3-models-logging-opentelemetry/elastic-blog-model-3-architecture.png)\\n\\n### Pros\\n\\n- Robust and field-proven\\n- Easy to debug (you can manually read the intermediate log file)\\n- Inherent file-based FIFO buffer\\n- Less susceptible to “glare” conditions when service prematurely exits\\n- Native ECS format for easy manipulation in Elastic\\n- Fleet-managed via Elastic Agent\\n\\n### Cons\\n\\n- All the headaches of file-based logging (rotation, disk overflow)\\n- No support for slf4j key-value pairs or Logback structured arguments\\n- Requires translation of OTel SpanID and TraceID in Filebeat config\\n- Disparate data paths for logs versus tracing and metrics\\n- Vendor-specific logging format\\n\\n### Demo\\n\\nMODE=filebeat docker-compose up\\n\\n## Recommendations\\n\\nFor most customers, I currently recommend Model 3 — namely, write to logs in ECS format (with OTel SpanID, TraceID, and Baggage metadata) and collect them with an Elastic Agent installed on the node hosting the application or service. Elastic Agent (or Filebeat) today provides the most field-proven and robust means of capturing log files from applications and services with OpenTelemetry context.\\n\\nFurther, you can leverage this same Elastic Agent instance (ideally running in your [Kubernetes daemonset](https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-managed-by-fleet.html)) to collect rich and robust metrics and logs from [Kubernetes](https://docs.elastic.co/en/integrations/kubernetes) and many other supported services via [Elastic Integrations](https://www.elastic.co/integrations/data-integrations). Finally, Elastic Agent facilitates remote management via [Fleet](https://www.elastic.co/guide/en/fleet/current/fleet-overview.html), avoiding bespoke configuration files.\\n\\nAlternatively, for customers who either wish to keep their nodes vendor-neutral or use a consolidated signal routing system, I recommend Model 2, wherein an OpenTelemetry collector is used to scrape service log files. While workable and practiced by some early adopters in the field today, this model inherently carries some risk given the current beta nature of the OpenTelemetry filelogreceiver.\\n\\nI generally do not recommend Model 1 given its limited language support, experimental/alpha status (the API could change), and current potential for data loss. That said, in time, with more language support and more thought to resilient designs, it has clear advantages both with regard to simplicity and richness of metadata.\\n\\n## Extracting more value from your logs\\n\\nIn contrast to tracing and metrics, most organizations have nearly 100% log coverage over their applications and services. This is an ideal beachhead upon which to build an application observability system. On the other hand, logs are notoriously noisy and unstructured; this is only amplified with the scale enabled by the hyperscalers and Kubernetes. Collecting log lines reliably is the easy part; making them useful at today’s scale is hard.\\n\\nGiven that logs are arguably the most challenging observability signal from which to extract value at scale, one should ideally give thoughtful consideration to a vendor’s support for logging in the context of other observability signals. Can they handle surges in log rates because of unexpected scale or an error or test scenario? Do they have the machine learning tool set to automatically recognize patterns in log lines, sort them into categories, and identify true anomalies? Can they provide cost-effective online searchability of logs over months or years without manual rehydration? Do they provide the tools to extract and analyze business KPIs buried in logs?\\n\\nAs an ardent and early supporter of OpenTelemetry, Elastic, of course, [natively ingests OTel traces, metrics, and logs](https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html). And just like all logs coming into our system, logs coming from OTel-equipped sources avail themselves of our [mature tooling and next-gen AI Ops technologies](https://www.elastic.co/observability/log-monitoring) to enable you to extract their full value.Interested? [Reach out to our pre-sales team](https://www.elastic.co/contact?storm=global-header-en) to get started building with Elastic!\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var g=Object.create;var i=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(o,e)=>()=>(e||o((e={exports:{}}).exports,e),e.exports),b=(o,e)=>{for(var n in e)i(o,n,{get:e[n],enumerable:!0})},a=(o,e,n,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let l of m(e))!f.call(o,l)&&l!==n&&i(o,l,{get:()=>e[l],enumerable:!(r=p(e,l))||r.enumerable});return o};var w=(o,e,n)=>(n=o!=null?g(u(o)):{},a(e||!o||!o.__esModule?i(n,\\"default\\",{value:o,enumerable:!0}):n,o)),v=o=>a(i({},\\"__esModule\\",{value:!0}),o);var c=y((O,s)=>{s.exports=_jsx_runtime});var E={};b(E,{default:()=>h,frontmatter:()=>T});var t=w(c()),T={title:\\"3 models for logging with OpenTelemetry and Elastic\\",slug:\\"3-models-logging-opentelemetry\\",date:\\"2023-06-27\\",description:\\"Because OpenTelemetry increases usage of tracing and metrics with developers, logging continues to provide flexible, application-specific, and event-driven data. Explore OpenTelemetry logging and how it provides guidance on the available approaches.\\",author:[{slug:\\"ty-bekiares\\"}],image:\\"log_infrastructure_apm_synthetics-monitoring.jpeg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"log-analytics\\"}]};function d(o){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...o.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Arguably, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" exists to (greatly) increase usage of tracing and metrics among developers. That said, logging will continue to play a critical role in providing flexible, application-specific, event-driven data. Further, OpenTelemetry has the potential to bring added value to existing application logging flows:\\"]}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Common metadata across tracing, metrics, and logging to facilitate contextual correlation, including metadata passed between services as part of REST or RPC APIs; this is a critical element of service observability in the age of distributed, horizontally scaled systems\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"An optional unified data path for tracing, metrics, and logging to facilitate common tooling and signal routing to your observability backend\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Adoption of metrics and tracing among developers to date has been relatively small. Further, the number of proprietary vendors and APIs (compared to adoption rate) is relatively large. As such, OpenTelemetry took a greenfield approach to developing new, vendor-agnostic APIs for tracing and metrics. In contrast, most developers have nearly 100% log coverage across their services. Moreover, logging is largely supported by a small number of vendor-agnostic, open-source logging libraries and associated APIs (e.g., \\",(0,t.jsx)(e.a,{href:\\"https://logback.qos.ch\\",rel:\\"nofollow\\",children:\\"Logback\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/dotnet/api/microsoft.extensions.logging.ilogger\\",rel:\\"nofollow\\",children:\\"ILogger\\"}),\\"). As such, \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/logs/#introduction\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\\\u2019s approach to logging\\"}),\\" meets developers where they already are using hooks into existing, popular logging frameworks. In this way, developers can add OpenTelemetry as a log signal output without otherwise altering their code and investment in logging as an observability signal.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Notably, logging is the least mature of OTel supported observability signals. Depending on your service\\\\u2019s \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/#status-and-releases\\",rel:\\"nofollow\\",children:\\"language\\"}),\\", and your appetite for adventure, there exist several options for exporting logs from your services and applications and marrying them together in your observability backend.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The intent of this article is to explore the current state of the art of \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/introduction-apm-tracing-logging-customer-experience\\",rel:\\"nofollow\\",children:\\"OpenTelemetry logging\\"}),\\" and to provide guidance on the available approaches with the following tenants in mind:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Correlation of service logs with OTel-generated tracing where applicable\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Proper capture of exceptions\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Common context across tracing, metrics, and logging\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Support for \\",(0,t.jsx)(e.a,{href:\\"https://www.slf4j.org/manual.html#fluent\\",rel:\\"nofollow\\",children:\\"slf4j key-value pairs\\"}),\\" (\\\\u201Cstructured logging\\\\u201D)\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Automatic attachment of metadata carried between services via \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/concepts/signals/baggage/\\",rel:\\"nofollow\\",children:\\"OTel baggage\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Use of an Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" Observability backend\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Consistent data fidelity in Elastic regardless of the approach taken\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"opentelemetry-logging-models\\",children:\\"OpenTelemetry logging models\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Three models currently exist for getting your application or service logs to Elastic with correlation to OTel tracing and baggage:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Output logs from your service (alongside traces and metrics) using an embedded \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/#status-and-releases\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Instrumentation library\\"}),\\" to Elastic via the OTLP protocol\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Write logs from your service to a file scraped by the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Collector\\"}),\\", which then forwards to Elastic via the OTLP protocol\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Write logs from your service to a file scraped by \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/elastic-agent\\",rel:\\"nofollow\\",children:\\"Elastic Agent\\"}),\\" (or \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/beats/filebeat\\",rel:\\"nofollow\\",children:\\"Filebeat\\"}),\\"), which then forwards to Elastic via an Elastic-defined protocol\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Note that (1), in contrast to (2) and (3), does not involve writing service logs to a file prior to ingestion into Elastic.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"logging-vs-span-events\\",children:\\"Logging vs. span events\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"It is worth noting that most APM systems, including OpenTelemetry, include provisions for \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/ruby/manual/#add-span-events\\",rel:\\"nofollow\\",children:\\"span events\\"}),\\". Like log statements, span events contain arbitrary, textual data. Additionally, span events automatically carry any custom attributes (e.g., a \\\\u201Cuser ID\\\\u201D) applied to the parent span, which can help with correlation and context. In this regard, it may be advantageous to translate some existing log statements (inside spans) to span events. As the name implies, of course, span events can only be emitted from within a span and thus are not intended to be a general purpose replacement for logging.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Unlike logging, span events do not pass through existing logging frameworks and therefore cannot (practically) be written to a log file. Further, span events are technically emitted as part of trace data and follow the same data path and signal routing as other trace data.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"polyfill-appender\\",children:\\"Polyfill appender\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Some of the demos make use of a custom Logback \\",(0,t.jsx)(e.a,{href:\\"https://github.com/ty-elastic/otel-logging/blob/main/java-otel-log/src/main/java/com/tb93/otel/batteries/PolyfillAppender.java\\",rel:\\"nofollow\\",children:\\"\\\\u201CPolyfill appender\\\\u201D\\"}),\\" (inspired by OTel\\\\u2019s \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/logback/logback-mdc-1.0/library\\",rel:\\"nofollow\\",children:\\"Logback MDC\\"}),\\"), which provides support for attaching \\",(0,t.jsx)(e.a,{href:\\"https://www.slf4j.org/manual.html#fluent\\",rel:\\"nofollow\\",children:\\"slf4j key-value pairs\\"}),\\" to log messages for models (2) and (3).\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"elastic-common-schema\\",children:\\"Elastic Common Schema\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For log messages to exhibit full fidelity within Elastic, they eventually need to be formatted in accordance with the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-reference.html\\",rel:\\"nofollow\\",children:\\"Elastic Common Schema\\"}),\\" (ECS). In models (1) and (2), log messages remain formatted in OTel log semantics until ingested by the Elastic APM Server. The Elastic APM Server then translates OTel log semantics to ECS. In model (3), ECS is applied at the source.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Notably, OpenTelemetry recently \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement\\",rel:\\"nofollow\\",children:\\"adopted the Elastic Common Schema\\"}),\\" as its standard for semantic conventions going forward! As such, it is anticipated that current OTel log semantics will be updated to align with ECS.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"getting-started\\",children:\\"Getting started\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The included demos center around a \\\\u201CPOJO\\\\u201D (no assumed framework) Java project. Java is arguably the most mature of OTel-supported languages, particularly with respect to logging options. Notably, this singular Java project was designed to support the three models of logging discussed here. In practice, you would only implement one of these models (and corresponding project dependencies).\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The demos assume you have a working \\",(0,t.jsx)(e.a,{href:\\"https://www.docker.com/\\",rel:\\"nofollow\\",children:\\"Docker\\"}),\\" environment and an \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" instance.\\"]}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"git clone \\",(0,t.jsx)(e.a,{href:\\"https://github.com/ty-elastic/otel-logging\\",rel:\\"nofollow\\",children:\\"https://github.com/ty-elastic/otel-logging\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Create an .env file at the root of otel-logging with the following (appropriately filled-in) environment variables:\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`# the service name\\nOTEL_SERVICE_NAME=app4\\n\\n# Filebeat vars\\nELASTIC_CLOUD_ID=(see https://www.elastic.co/guide/en/beats/metricbeat/current/configure-cloud-id.html)\\nELASTIC_CLOUD_AUTH=(see https://www.elastic.co/guide/en/beats/metricbeat/current/configure-cloud-id.html)\\n\\n# apm vars\\nELASTIC_APM_SERVER_ENDPOINT=(address of your Elastic Cloud APM server... i.e., https://xyz123.apm.us-central1.gcp.cloud.es.io:443)\\nELASTIC_APM_SERVER_SECRET=(see https://www.elastic.co/guide/en/apm/guide/current/secret-token.html)\\n`})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Start up the demo with the desired model:\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"If you want to demo logging via OTel APM Agent, run MODE=apm docker-compose up\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"If you want to demo logging via OTel filelogreceiver, run MODE=filelogreceiver docker-compose up\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"If you want to demo logging via Elastic filebeat, run MODE=filebeat docker-compose up\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Validate incoming span and correlated log data in your Elastic Cloud instance\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"model-1-logging-via-opentelemetry-instrumentation\\",children:\\"Model 1: Logging via OpenTelemetry instrumentation\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This model aligns with the long-term goals of OpenTelemetry: \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/logs/#opentelemetry-solution\\",rel:\\"nofollow\\",children:\\"integrated tracing, metrics, and logging (with common attributes) from your services\\"}),\\" via the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/#status-and-releases\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Instrumentation libraries\\"}),\\", without dependency on log files and scrappers.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this model, your service generates log statements as it always has, using popular logging libraries (e.g., \\",(0,t.jsx)(e.a,{href:\\"https://logback.qos.ch\\",rel:\\"nofollow\\",children:\\"Logback\\"}),\\" for Java). OTel provides a \\\\u201CSouthbound hook\\\\u201D to Logback via the OTel \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/logback/logback-appender-1.0/library\\",rel:\\"nofollow\\",children:\\"Logback Appender\\"}),\\", which injects ServiceName, SpanID, TraceID, slf4j key-value pairs, and OTel baggage into log records and passes the composed records to the co-resident OpenTelemetry Instrumentation library. We further employ a \\",(0,t.jsx)(e.a,{href:\\"https://github.com/ty-elastic/otel-logging/blob/main/java-otel-log/src/main/java/com/tb93/otel/batteries/AddBaggageLogProcessor.java\\",rel:\\"nofollow\\",children:\\"custom LogRecordProcessor\\"}),\\" to add baggage to the log record as attributes.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The OTel instrumentation library then formats the log statements per the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/logs/data-model/\\",rel:\\"nofollow\\",children:\\"OTel logging spec\\"}),\\" and ships them via OTLP to either an OTel Collector for further routing and enrichment or directly to Elastic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Notably, as language support improves, this model can and will be supported by runtime agent binding with auto-instrumentation where available (e.g., no code changes required for runtime languages).\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"One distinguishing advantage of this model, beyond the simplicity it affords, is the ability to more easily tie together attributes and tracing metadata directly with log statements. This inherently makes logging more useful in the context of other OTel-supported observability signals.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"architecture\\",children:\\"Architecture\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/3-models-logging-opentelemetry/elastic-blog-model-1-architecture.png\\",alt:\\"model 1 architecture\\",width:\\"443\\",height:\\"818\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Although not explicitly pictured, an \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Collector\\"}),\\" can be inserted in between the service and Elastic to facilitate additional enrichment and/or signal routing or duplication across observability backends.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"pros\\",children:\\"Pros\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Simplified signal architecture and fewer \\\\u201Cmoving parts\\\\u201D (no files, disk utilization, or file rotation concerns)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Aligns with long-term OTel vision\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Log statements can be (easily) decorated with OTel metadata\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"No polyfill adapter required to support structured logging with slf4j\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"No additional collectors/agents required\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Conversion to ECS happens within Elastic keeping log data vendor-agnostic until ingestion\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Common wireline protocol (OTLP) across tracing, metrics, and logs\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"cons\\",children:\\"Cons\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Not available (yet) in many OTel-supported languages\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"No intermediate log file for ad-hoc, on-node debugging\\"}),`\\n`,(0,t.jsx)(e.li,{children:`Immature (alpha/experimental)\\nUnknown \\\\u201Cglare\\\\u201D conditions, which could result in loss of log data if service exits prematurely or if the backend is unable to accept log data for an extended period of time`}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"demo\\",children:\\"Demo\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"MODE=apm docker-compose up\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"model-2-logging-via-the-opentelemetry-collector\\",children:\\"Model 2: Logging via the OpenTelemetry Collector\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Given the cons of Model 1, it may be advantageous to consider a model that continues to leverage an actual log file intermediary between your services and your observability backend. Such a model is possible using an \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Collector\\"}),\\" collocated with your services (e.g., on the same host), running the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/filelogreceiver/README.md\\",rel:\\"nofollow\\",children:\\"filelogreceiver\\"}),\\" to scrape service log files.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this model, your service generates log statements as it always has, using popular logging libraries (e.g., \\",(0,t.jsx)(e.a,{href:\\"https://logback.qos.ch\\",rel:\\"nofollow\\",children:\\"Logback\\"}),\\" for Java). OTel provides a MDC Appender for Logback (\\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/logback/logback-mdc-1.0/library\\",rel:\\"nofollow\\",children:\\"Logback MDC\\"}),\\"), which adds SpanID, TraceID, and Baggage to the \\",(0,t.jsx)(e.a,{href:\\"https://logback.qos.ch/manual/mdc.html\\",rel:\\"nofollow\\",children:\\"Logback MDC context\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Notably, no log record structure is assumed by the OTel filelogreceiver. In the example provided, we employ the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/logfellow/logstash-logback-encoder\\",rel:\\"nofollow\\",children:\\"logstash-logback-encoder\\"}),\\" to JSON-encode log messages. The logstash-logback-encoder will read the OTel SpanID, TraceID, and Baggage off the MDC context and encode it into the JSON structure. Notably, logstash-logback-encoder doesn\\\\u2019t explicitly support \\",(0,t.jsx)(e.a,{href:\\"https://www.slf4j.org/manual.html#fluent\\",rel:\\"nofollow\\",children:\\"slf4j key-value pairs\\"}),\\". It does, however, support \\",(0,t.jsx)(e.a,{href:\\"https://github.com/logfellow/logstash-logback-encoder#event-specific-custom-fields\\",rel:\\"nofollow\\",children:\\"Logback structured arguments\\"}),\\", and thus I use the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/ty-elastic/otel-logging/blob/main/java-otel-log/src/main/java/com/tb93/otel/batteries/PolyfillAppender.java\\",rel:\\"nofollow\\",children:\\"Polyfill Appender\\"}),\\" to convert slf4j key-value pairs to Logback structured arguments.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"From there, we write the log lines to a log file. If you are using Kubernetes or other container orchestration in your environment, you would more typically write to stdout (console) and let the orchestration log driver write to and manage log files.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We then \\",(0,t.jsx)(e.a,{href:\\"https://github.com/ty-elastic/otel-logging/blob/main/collector/filelogreceiver.yml\\",rel:\\"nofollow\\",children:\\"configure\\"}),\\" the OTel Collector to scrape this log file (using the filelogreceiver). Because no assumptions are made about the format of the log lines, you need to \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/stanza/docs/types/parsers.md#parsers\\",rel:\\"nofollow\\",children:\\"explicitly map fields\\"}),\\" from your log schema to the OTel log schema.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"From there, the OTel Collector batches and ships the formatted log lines via OTLP to Elastic.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"architecture-1\\",children:\\"Architecture\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/3-models-logging-opentelemetry/elastic-blog-model-2-architecture.png\\",alt:\\"model 2 architecture\\",width:\\"524\\",height:\\"1120\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"pros-1\\",children:\\"Pros\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Easy to debug (you can manually read the intermediate log file)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Inherent file-based FIFO buffer\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Less susceptible to \\\\u201Cglare\\\\u201D conditions when service prematurely exits\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Conversion to ECS happens within Elastic keeping log data vendor-agnostic until ingestion\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Common wireline protocol (OTLP) across tracing, metrics, and logs\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"cons-1\\",children:\\"Cons\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"All the headaches of file-based logging (rotation, disk overflow)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Beta quality and not yet proven in the field\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"No support for slf4j key-value pairs\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"demo-1\\",children:\\"Demo\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"MODE=filelogreceiver docker-compose up\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"model-3-logging-via-elastic-agent-or-filebeat\\",children:\\"Model 3: Logging via Elastic Agent (or Filebeat)\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Although the second model described affords some resilience as a function of the backing file, the OTel Collector filelogreceiver module is still decidedly \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/filelogreceiver\\",rel:\\"nofollow\\",children:\\"\\\\u201Cbeta\\\\u201D\\"}),\\" in quality. Because of the importance of logs as a debugging tool, today I generally recommend that customers continue to import logs into Elastic using the field-proven \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/elastic-agent\\",rel:\\"nofollow\\",children:\\"Elastic Agent\\"}),\\" or \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/beats/filebeat\\",rel:\\"nofollow\\",children:\\"Filebeat\\"}),\\" scrappers. Elastic Agent and Filebeat have many years of field maturity under their collective belt. Further, it is often advantageous to deploy Elastic Agent anyway to capture the multitude of signals outside the purview of OpenTelemetry (e.g., deep Kubernetes and host metrics, security, etc.).\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this model, your service generates log statements as it always has, using popular logging libraries (e.g., \\",(0,t.jsx)(e.a,{href:\\"https://logback.qos.ch\\",rel:\\"nofollow\\",children:\\"Logback\\"}),\\" for Java). As with model 2, we employ OTel\\\\u2019s \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/logback/logback-mdc-1.0/library\\",rel:\\"nofollow\\",children:\\"Logback MDC\\"}),\\" to add SpanID, TraceID, and Baggage to the \\",(0,t.jsx)(e.a,{href:\\"https://logback.qos.ch/manual/mdc.html\\",rel:\\"nofollow\\",children:\\"Logback MDC context\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"From there, we employ the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs-logging/java/current/setup.html\\",rel:\\"nofollow\\",children:\\"Elastic ECS Encoder\\"}),\\" to encode log statements compliant to the Elastic Common Schema. The Elastic ECS Encoder will read the OTel SpanID, TraceID, and Baggage off the MDC context and encode it into the JSON structure. Similar to model 2, the Elastic ECS Encoder doesn\\\\u2019t support sl4f key-vair arguments. Curiously, the Elastic ECS encoder also doesn\\\\u2019t appear to support Logback structured arguments. Thus, within the Polyfill Appender, I add slf4j key-value pairs as MDC context. This is less than ideal, however, since MDC forces all values to be strings.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"From there, we write the log lines to a log file. If you are using Kubernetes or other container orchestration in your environment, you would more typically write to stdout (console) and let the orchestration log driver write to and manage log files.We then configure Elastic Agent or Filebeat to scrape the log file. Notably, the Elastic ECS Encoder does not currently translate incoming OTel SpanID and TraceID variables on the MDC. Thus, we need to perform manual translation of these variables in the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/ty-elastic/otel-logging/blob/main/filebeat.yml\\",rel:\\"nofollow\\",children:\\"Filebeat (or Elastic Agent) configuration\\"}),\\" to map them to their ECS equivalent.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"architecture-2\\",children:\\"Architecture\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/3-models-logging-opentelemetry/elastic-blog-model-3-architecture.png\\",alt:\\"model 3 architecture\\",width:\\"524\\",height:\\"927\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"pros-2\\",children:\\"Pros\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Robust and field-proven\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Easy to debug (you can manually read the intermediate log file)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Inherent file-based FIFO buffer\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Less susceptible to \\\\u201Cglare\\\\u201D conditions when service prematurely exits\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Native ECS format for easy manipulation in Elastic\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Fleet-managed via Elastic Agent\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"cons-2\\",children:\\"Cons\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"All the headaches of file-based logging (rotation, disk overflow)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"No support for slf4j key-value pairs or Logback structured arguments\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Requires translation of OTel SpanID and TraceID in Filebeat config\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Disparate data paths for logs versus tracing and metrics\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Vendor-specific logging format\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"demo-2\\",children:\\"Demo\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"MODE=filebeat docker-compose up\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"recommendations\\",children:\\"Recommendations\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For most customers, I currently recommend Model 3 \\\\u2014 namely, write to logs in ECS format (with OTel SpanID, TraceID, and Baggage metadata) and collect them with an Elastic Agent installed on the node hosting the application or service. Elastic Agent (or Filebeat) today provides the most field-proven and robust means of capturing log files from applications and services with OpenTelemetry context.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Further, you can leverage this same Elastic Agent instance (ideally running in your \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-managed-by-fleet.html\\",rel:\\"nofollow\\",children:\\"Kubernetes daemonset\\"}),\\") to collect rich and robust metrics and logs from \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/kubernetes\\",rel:\\"nofollow\\",children:\\"Kubernetes\\"}),\\" and many other supported services via \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations\\",rel:\\"nofollow\\",children:\\"Elastic Integrations\\"}),\\". Finally, Elastic Agent facilitates remote management via \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/fleet-overview.html\\",rel:\\"nofollow\\",children:\\"Fleet\\"}),\\", avoiding bespoke configuration files.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Alternatively, for customers who either wish to keep their nodes vendor-neutral or use a consolidated signal routing system, I recommend Model 2, wherein an OpenTelemetry collector is used to scrape service log files. While workable and practiced by some early adopters in the field today, this model inherently carries some risk given the current beta nature of the OpenTelemetry filelogreceiver.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"I generally do not recommend Model 1 given its limited language support, experimental/alpha status (the API could change), and current potential for data loss. That said, in time, with more language support and more thought to resilient designs, it has clear advantages both with regard to simplicity and richness of metadata.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"extracting-more-value-from-your-logs\\",children:\\"Extracting more value from your logs\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In contrast to tracing and metrics, most organizations have nearly 100% log coverage over their applications and services. This is an ideal beachhead upon which to build an application observability system. On the other hand, logs are notoriously noisy and unstructured; this is only amplified with the scale enabled by the hyperscalers and Kubernetes. Collecting log lines reliably is the easy part; making them useful at today\\\\u2019s scale is hard.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Given that logs are arguably the most challenging observability signal from which to extract value at scale, one should ideally give thoughtful consideration to a vendor\\\\u2019s support for logging in the context of other observability signals. Can they handle surges in log rates because of unexpected scale or an error or test scenario? Do they have the machine learning tool set to automatically recognize patterns in log lines, sort them into categories, and identify true anomalies? Can they provide cost-effective online searchability of logs over months or years without manual rehydration? Do they provide the tools to extract and analyze business KPIs buried in logs?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As an ardent and early supporter of OpenTelemetry, Elastic, of course, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html\\",rel:\\"nofollow\\",children:\\"natively ingests OTel traces, metrics, and logs\\"}),\\". And just like all logs coming into our system, logs coming from OTel-equipped sources avail themselves of our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/log-monitoring\\",rel:\\"nofollow\\",children:\\"mature tooling and next-gen AI Ops technologies\\"}),\\" to enable you to extract their full value.Interested? \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/contact?storm=global-header-en\\",rel:\\"nofollow\\",children:\\"Reach out to our pre-sales team\\"}),\\" to get started building with Elastic!\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(o={}){let{wrapper:e}=o.components||{};return e?(0,t.jsx)(e,{...o,children:(0,t.jsx)(d,{...o})}):d(o)}return v(E);})();\\n;return Component;"},"_id":"articles/3-models-logging-opentelemetry-elastic.mdx","_raw":{"sourceFilePath":"articles/3-models-logging-opentelemetry-elastic.mdx","sourceFileName":"3-models-logging-opentelemetry-elastic.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/3-models-logging-opentelemetry-elastic"},"type":"Article","imageUrl":"/assets/images/3-models-logging-opentelemetry/log_infrastructure_apm_synthetics-monitoring.jpeg","readingTime":"14 min read","url":"/3-models-logging-opentelemetry","headings":[{"level":2,"title":"OpenTelemetry logging models","href":"#opentelemetry-logging-models"},{"level":2,"title":"Logging vs. span events","href":"#logging-vs-span-events"},{"level":2,"title":"Polyfill appender","href":"#polyfill-appender"},{"level":2,"title":"Elastic Common Schema","href":"#elastic-common-schema"},{"level":2,"title":"Getting started","href":"#getting-started"},{"level":2,"title":"Model 1: Logging via OpenTelemetry instrumentation","href":"#model-1-logging-via-opentelemetry-instrumentation"},{"level":3,"title":"Architecture","href":"#architecture"},{"level":3,"title":"Pros","href":"#pros"},{"level":3,"title":"Cons","href":"#cons"},{"level":3,"title":"Demo","href":"#demo"},{"level":2,"title":"Model 2: Logging via the OpenTelemetry Collector","href":"#model-2-logging-via-the-opentelemetry-collector"},{"level":3,"title":"Architecture","href":"#architecture-1"},{"level":3,"title":"Pros","href":"#pros-1"},{"level":3,"title":"Cons","href":"#cons-1"},{"level":3,"title":"Demo","href":"#demo-1"},{"level":2,"title":"Model 3: Logging via Elastic Agent (or Filebeat)","href":"#model-3-logging-via-elastic-agent-or-filebeat"},{"level":2,"title":"Architecture","href":"#architecture-2"},{"level":3,"title":"Pros","href":"#pros-2"},{"level":3,"title":"Cons","href":"#cons-2"},{"level":3,"title":"Demo","href":"#demo-2"},{"level":2,"title":"Recommendations","href":"#recommendations"},{"level":2,"title":"Extracting more value from your logs","href":"#extracting-more-value-from-your-logs"}]},{"title":"Adding free and open Elastic APM as part of your Elastic Observability deployment","slug":"free-open-elastic-apm-observability-deployment","date":"2024-02-28","description":"Learn how to gather application trace data and store it alongside the logs and metrics from your applications and infrastructure with Elastic Observability and Elastic APM.","image":"blog-thumb-release-apm.png","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"apm","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn a recent post, we showed you [how to get started with the free and open tier of Elastic Observability](https://www.elastic.co/blog/getting-started-with-free-and-open-elastic-observability). Below, we\'ll walk through what you need to do to expand your deployment so you can start gathering metrics from application performance monitoring (APM) or \\"tracing\\" data in your observability cluster, for free.\\n\\n## What is APM?\\n\\nApplication performance monitoring lets you see where your applications spend their time, what they are doing, what other applications or services they are calling, and what errors or exceptions they are encountering.\\n\\n![](/assets/images/free-open-elastic-apm-observability-deployment/screenshot-serverless-distributed-trace.png)\\n\\nIn addition, APM also lets you see history and trends for key performance indicators, such as latency and throughput, as well as transaction and dependency information:\\n\\n![](/assets/images/free-open-elastic-apm-observability-deployment/ruby-overview.png)\\n\\nWhether you\'re setting up alerts for SLA breaches, trying to gauge the impact of your latest release, or deciding where to make the next improvement, APM can help with your root-cause analysis to help improve your users\' experience and drive your mean time to resolution (MTTR) toward zero.\\n\\n## Logical architecture\\n\\nElastic APM relies on the APM Integration inside Elastic Agent, which forwards application trace and metric data from applications instrumented with APM agents to an Elastic Observability cluster. Elastic APM supports multiple agent flavors:\\n\\n- Native Elastic APM Agents, available for [multiple languages](https://www.elastic.co/guide/en/apm/agent/index.html), including Java, .NET, Go, Ruby, Python, Node.js, PHP, and client-side JavaScript\\n- Code instrumented with [OpenTelemetry](https://www.elastic.co/guide/en/apm/get-started/current/open-telemetry-elastic.html)\\n- Code instrumented with [OpenTracing](https://www.elastic.co/guide/en/apm/get-started/current/opentracing.html)\\n- Code instrumented with [Jaeger](https://www.elastic.co/guide/en/apm/server/current/jaeger.html)\\n\\n![](/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-instrumented-services.png)\\n\\nIn this blog, we\'ll provide a quick example of how to instrument code with the native Elastic APM Python agent, but the overall steps are similar for other languages.\\n\\nPlease note that there is a strong distinction between the **Elastic APM Agent** and the **Elastic Agent**. These are very different components, as you can see in the diagram above, so it\'s important not to confuse them.\\n\\n## Install the Elastic Agent\\n\\nThe first step is to install the Elastic Agent. You either need Fleet [installed first](https://www.elastic.co/guide/en/fleet/current/add-a-fleet-server.html), or you can install the Elastic Agent standalone. Install the Elastic Agent somewhere by [following this guide](https://www.elastic.co/guide/en/fleet/master/elastic-agent-installation.html). This will give you an APM Integration endpoint you can hit. Note that this step is not necessary in Elastic Cloud, as we host the APM Integration for you. Check Elastic Agent is up by running:\\n\\n```bash\\ncurl :8200\\n```\\n\\n## Instrumenting sample code with an Elastic APM agent\\n\\nThe instructions for the various language agents differ based on the programming language, but at a high level they have a similar flow. First, you add the dependency for the agent in the language\'s native spec, then you configure the agent to let it know how to find the APM Integration.\\n\\nYou can try out any flavor you\'d like, but I am going to walk through the Python instructions using this Python example that [I created](https://github.com/davidgeorgehope/PythonElasticAPMExample).\\n\\n### Get the sample code (or use your own)\\n\\nTo get started, I clone the GitHub repository then change to the directory:\\n\\n```python\\ngit clone https://github.com/davidgeorgehope/PythonElasticAPMExample\\ncd PythonElasticAPMExample\\n```\\n\\n### How to add the dependency\\n\\nAdding the Elastic APM Dependency is simple — check the app.py file from [the github repo](https://github.com/davidgeorgehope/PythonElasticAPMExample/blob/main/app.py) and you will notice the following lines of code.\\n\\n```python\\nimport elasticapm\\nfrom elasticapm import Client\\n\\napp = Flask(__name__)\\napp.config[\\"ELASTIC_APM\\"] = { \\"SERVICE_NAME\\": os.environ.get(\\"APM_SERVICE_NAME\\", \\"flask-app\\"), \\"SECRET_TOKEN\\": os.environ.get(\\"APM_SECRET_TOKEN\\", \\"\\"), \\"SERVER_URL\\": os.environ.get(\\"APM_SERVER_URL\\", \\"https://localhost:8200\\"),}\\nelasticapm.instrumentation.control.instrument()\\nclient = Client(app.config[\\"ELASTIC_APM\\"])\\n```\\n\\nThe Python library for Flask is capable of auto detecting transactions, but you can also start transactions in code as per the following, as we have done in this example:\\n\\n```python\\n@app.route(\\"/\\")\\ndef hello():\\n client.begin_transaction(\'demo-transaction\')\\n client.end_transaction(\'demo-transaction\', \'success\')\\n```\\n\\n### Configure the agent\\n\\nThe agents need to send application trace data to the APM Integration, and to do this it has to be reachable. I configured the Elastic Agent to listen on my local host\'s IP, so anything in my subnet can send data to it. As you can see from the code below, we use docker-compose.yml to pass in the config via environment variables. Please edit these variables for your own Elastic installation.\\n\\n```yaml\\n# docker-compose.yml\\nversion: \\"3.9\\"\\nservices:\\n flask_app:\\n build: .\\n ports:\\n - \\"5001:5001\\"\\n environment:\\n - PORT=5001\\n - APM_SERVICE_NAME=flask-app\\n - APM_SECRET_TOKEN=your_secret_token\\n - APM_SERVER_URL=https://host.docker.internal:8200\\n```\\n\\nSome commentary on the above:\\n\\n- **service_name:** If you leave this out it will just default to the application\'s name, but you can override that here.\\n- **secret_token:** [Secret tokens](https://www.elastic.co/guide/en/apm/server/current/secret-token.html) allow you to authorize requests to the APM Server, but they require that the APM Server is set up with SSL/TLS and that a secret token has been set up. We\'re not using HTTPS between the agents and the APM Server, so we\'ll comment this one out.\\n- **server_url:** This is how the agent can reach the APM Integration inside Elastic Agent. Replace this with the name or IP of your host running Elastic Agent.\\n\\nNow that the Elastic APM side of the configuration is done, we simply follow the steps from the [README](https://github.com/davidgeorgehope/PythonElasticAPMExample/blob/main/README.md) to start up.\\n\\n```bash\\ndocker-compose up --build -d\\n```\\n\\nThe build step will take several minutes.\\n\\nYou can navigate to the running sample application by visiting https://localhost:5001. There\'s not a lot to the sample, but it does generate some APM data. To generate a bit of a load, you can reload them a few times or run a quick little script:\\n\\n```bash\\n#!/bin/bash\\n# load_test.sh\\nurl=\\"https://localhost:5001\\"\\nfor i in {1..1000}\\ndo\\n curl -s -o /dev/null $url\\n sleep 1\\ndone\\n```\\n\\nThis will just reload the pages every second.\\n\\nBack in Kibana, navigate back to the APM app (hamburger icon, then select **APM** ) and you should see our new flask-app service (I let mine run so it shows a bit more history):\\n\\n![](/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-services.png)\\n\\nThe Service Overview page provides an at-a-glance summary of the health of a service in one place. If you\'re a developer or an SRE, this is the page that will help you answer questions like:\\n\\n- How did a new deployment impact performance?\\n- What are the top impacted transactions?\\n- How does performance correlate with underlying infrastructure?\\n\\nThis view provides a list of all of the applications that have sent application trace data to Elastic APM in the specified period of time (in this case, the last 15 minutes). There are also sparklines showing mini graphs of latency, throughput, and error rate. Clicking on **flask-app** takes us to the **service overview** page, which shows the various transactions within the service (recall that my script is hitting the / endpoint, as seen in the **Transactions** section). We get bigger graphs for **Latency** , **Throughput** , **Errors** , and **Error Rates**.\\n\\n![](/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-flask-app.png)\\n\\nWhen you\'re instrumenting real applications, under real load, you\'ll see a lot more connectivity (and errors!)\\n\\nClicking on a transaction in the transaction view, in this case, our sample app\'s demo-transaction transaction, we can see exactly what operations were called:\\n\\n![](/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-flask-app-demo-transaction.png)\\n\\nThis includes detailed information about calls to external services, such as database queries:\\n\\n![](/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-span-details.png)\\n\\n## What\'s next?\\n\\nNow that you\'ve got your Elastic Observability cluster up and running and collecting out-of-the-box application trace data, explore the public APIs for the languages that your applications are using, which allow you to take your APM data to the next level. The APIs allow you to add custom metadata, define business transactions, create custom spans, and more. You can find the public API specs for the various APM agents (such as [Java](https://www.elastic.co/guide/en/apm/agent/java/current/public-api.html), [Ruby](https://www.elastic.co/guide/en/apm/agent/ruby/current/api.html), [Python](https://www.elastic.co/guide/en/apm/agent/python/current/index.html), and more) on the APM agent [documentation pages](https://www.elastic.co/guide/en/apm/agent/index.html).\\n\\nIf you\'d like to learn more about Elastic APM, check out [our webinar on Elastic APM in the shift to cloud native](https://www.elastic.co/webinars/introduction-to-elastic-apm-in-the-shift-to-cloud-native) to see other ways that Elastic APM can help you in your ecosystem.\\n\\nIf you decide that you\'d rather have us host your observability cluster, you can sign up for a free trial of the [Elasticsearch Service on Elastic Cloud](https://www.elastic.co/cloud/) and change your agents to point to your new cluster.\\n\\n_Originally published May 5, 2021; updated April 6, 2023._\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var a in e)o(n,a,{get:e[a],enumerable:!0})},l=(n,e,a,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of g(e))!w.call(n,i)&&i!==a&&o(n,i,{get:()=>e[i],enumerable:!(r=u(e,i))||r.enumerable});return n};var b=(n,e,a)=>(a=n!=null?p(m(n)):{},l(e||!n||!n.__esModule?o(a,\\"default\\",{value:n,enumerable:!0}):a,n)),v=n=>l(o({},\\"__esModule\\",{value:!0}),n);var h=y((M,s)=>{s.exports=_jsx_runtime});var A={};f(A,{default:()=>d,frontmatter:()=>E});var t=b(h()),E={title:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\",slug:\\"free-open-elastic-apm-observability-deployment\\",date:\\"2024-02-28\\",description:\\"Learn how to gather application trace data and store it alongside the logs and metrics from your applications and infrastructure with Elastic Observability and Elastic APM.\\",author:[{slug:\\"david-hope\\"}],image:\\"blog-thumb-release-apm.png\\",tags:[{slug:\\"apm\\"},{slug:\\"opentelemetry\\"}]};function c(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"In a recent post, we showed you \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-with-free-and-open-elastic-observability\\",rel:\\"nofollow\\",children:\\"how to get started with the free and open tier of Elastic Observability\\"}),`. Below, we\'ll walk through what you need to do to expand your deployment so you can start gathering metrics from application performance monitoring (APM) or \\"tracing\\" data in your observability cluster, for free.`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-is-apm\\",children:\\"What is APM?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Application performance monitoring lets you see where your applications spend their time, what they are doing, what other applications or services they are calling, and what errors or exceptions they are encountering.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/free-open-elastic-apm-observability-deployment/screenshot-serverless-distributed-trace.png\\",alt:\\"\\",width:\\"1920\\",height:\\"1080\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition, APM also lets you see history and trends for key performance indicators, such as latency and throughput, as well as transaction and dependency information:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/free-open-elastic-apm-observability-deployment/ruby-overview.png\\",alt:\\"\\",width:\\"1920\\",height:\\"1080\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Whether you\'re setting up alerts for SLA breaches, trying to gauge the impact of your latest release, or deciding where to make the next improvement, APM can help with your root-cause analysis to help improve your users\' experience and drive your mean time to resolution (MTTR) toward zero.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"logical-architecture\\",children:\\"Logical architecture\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic APM relies on the APM Integration inside Elastic Agent, which forwards application trace and metric data from applications instrumented with APM agents to an Elastic Observability cluster. Elastic APM supports multiple agent flavors:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Native Elastic APM Agents, available for \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/index.html\\",rel:\\"nofollow\\",children:\\"multiple languages\\"}),\\", including Java, .NET, Go, Ruby, Python, Node.js, PHP, and client-side JavaScript\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Code instrumented with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/get-started/current/open-telemetry-elastic.html\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Code instrumented with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/get-started/current/opentracing.html\\",rel:\\"nofollow\\",children:\\"OpenTracing\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Code instrumented with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/server/current/jaeger.html\\",rel:\\"nofollow\\",children:\\"Jaeger\\"})]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-instrumented-services.png\\",alt:\\"\\",width:\\"877\\",height:\\"391\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we\'ll provide a quick example of how to instrument code with the native Elastic APM Python agent, but the overall steps are similar for other languages.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Please note that there is a strong distinction between the \\",(0,t.jsx)(e.strong,{children:\\"Elastic APM Agent\\"}),\\" and the \\",(0,t.jsx)(e.strong,{children:\\"Elastic Agent\\"}),\\". These are very different components, as you can see in the diagram above, so it\'s important not to confuse them.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"install-the-elastic-agent\\",children:\\"Install the Elastic Agent\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The first step is to install the Elastic Agent. You either need Fleet \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/add-a-fleet-server.html\\",rel:\\"nofollow\\",children:\\"installed first\\"}),\\", or you can install the Elastic Agent standalone. Install the Elastic Agent somewhere by \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/master/elastic-agent-installation.html\\",rel:\\"nofollow\\",children:\\"following this guide\\"}),\\". This will give you an APM Integration endpoint you can hit. Note that this step is not necessary in Elastic Cloud, as we host the APM Integration for you. Check Elastic Agent is up by running:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`curl :8200\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"instrumenting-sample-code-with-an-elastic-apm-agent\\",children:\\"Instrumenting sample code with an Elastic APM agent\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The instructions for the various language agents differ based on the programming language, but at a high level they have a similar flow. First, you add the dependency for the agent in the language\'s native spec, then you configure the agent to let it know how to find the APM Integration.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can try out any flavor you\'d like, but I am going to walk through the Python instructions using this Python example that \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/PythonElasticAPMExample\\",rel:\\"nofollow\\",children:\\"I created\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"get-the-sample-code-or-use-your-own\\",children:\\"Get the sample code (or use your own)\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To get started, I clone the GitHub repository then change to the directory:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`git clone https://github.com/davidgeorgehope/PythonElasticAPMExample\\ncd PythonElasticAPMExample\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"how-to-add-the-dependency\\",children:\\"How to add the dependency\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Adding the Elastic APM Dependency is simple \\\\u2014 check the app.py file from \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/PythonElasticAPMExample/blob/main/app.py\\",rel:\\"nofollow\\",children:\\"the github repo\\"}),\\" and you will notice the following lines of code.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`import elasticapm\\nfrom elasticapm import Client\\n\\napp = Flask(__name__)\\napp.config[\\"ELASTIC_APM\\"] = { \\"SERVICE_NAME\\": os.environ.get(\\"APM_SERVICE_NAME\\", \\"flask-app\\"), \\"SECRET_TOKEN\\": os.environ.get(\\"APM_SECRET_TOKEN\\", \\"\\"), \\"SERVER_URL\\": os.environ.get(\\"APM_SERVER_URL\\", \\"https://localhost:8200\\"),}\\nelasticapm.instrumentation.control.instrument()\\nclient = Client(app.config[\\"ELASTIC_APM\\"])\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Python library for Flask is capable of auto detecting transactions, but you can also start transactions in code as per the following, as we have done in this example:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`@app.route(\\"/\\")\\ndef hello():\\n client.begin_transaction(\'demo-transaction\')\\n client.end_transaction(\'demo-transaction\', \'success\')\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"configure-the-agent\\",children:\\"Configure the agent\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The agents need to send application trace data to the APM Integration, and to do this it has to be reachable. I configured the Elastic Agent to listen on my local host\'s IP, so anything in my subnet can send data to it. As you can see from the code below, we use docker-compose.yml to pass in the config via environment variables. Please edit these variables for your own Elastic installation.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`# docker-compose.yml\\nversion: \\"3.9\\"\\nservices:\\n flask_app:\\n build: .\\n ports:\\n - \\"5001:5001\\"\\n environment:\\n - PORT=5001\\n - APM_SERVICE_NAME=flask-app\\n - APM_SECRET_TOKEN=your_secret_token\\n - APM_SERVER_URL=https://host.docker.internal:8200\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Some commentary on the above:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"service_name:\\"}),\\" If you leave this out it will just default to the application\'s name, but you can override that here.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"secret_token:\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/server/current/secret-token.html\\",rel:\\"nofollow\\",children:\\"Secret tokens\\"}),\\" allow you to authorize requests to the APM Server, but they require that the APM Server is set up with SSL/TLS and that a secret token has been set up. We\'re not using HTTPS between the agents and the APM Server, so we\'ll comment this one out.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"server_url:\\"}),\\" This is how the agent can reach the APM Integration inside Elastic Agent. Replace this with the name or IP of your host running Elastic Agent.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that the Elastic APM side of the configuration is done, we simply follow the steps from the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/PythonElasticAPMExample/blob/main/README.md\\",rel:\\"nofollow\\",children:\\"README\\"}),\\" to start up.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker-compose up --build -d\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The build step will take several minutes.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can navigate to the running sample application by visiting \\",(0,t.jsx)(e.a,{href:\\"https://localhost:5001\\",rel:\\"nofollow\\",children:\\"https://localhost:5001\\"}),\\". There\'s not a lot to the sample, but it does generate some APM data. To generate a bit of a load, you can reload them a few times or run a quick little script:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`#!/bin/bash\\n# load_test.sh\\nurl=\\"https://localhost:5001\\"\\nfor i in {1..1000}\\ndo\\n curl -s -o /dev/null $url\\n sleep 1\\ndone\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This will just reload the pages every second.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Back in Kibana, navigate back to the APM app (hamburger icon, then select \\",(0,t.jsx)(e.strong,{children:\\"APM\\"}),\\" ) and you should see our new flask-app service (I let mine run so it shows a bit more history):\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-services.png\\",alt:\\"\\",width:\\"1591\\",height:\\"515\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Service Overview page provides an at-a-glance summary of the health of a service in one place. If you\'re a developer or an SRE, this is the page that will help you answer questions like:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"How did a new deployment impact performance?\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"What are the top impacted transactions?\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"How does performance correlate with underlying infrastructure?\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This view provides a list of all of the applications that have sent application trace data to Elastic APM in the specified period of time (in this case, the last 15 minutes). There are also sparklines showing mini graphs of latency, throughput, and error rate. Clicking on \\",(0,t.jsx)(e.strong,{children:\\"flask-app\\"}),\\" takes us to the \\",(0,t.jsx)(e.strong,{children:\\"service overview\\"}),\\" page, which shows the various transactions within the service (recall that my script is hitting the / endpoint, as seen in the \\",(0,t.jsx)(e.strong,{children:\\"Transactions\\"}),\\" section). We get bigger graphs for \\",(0,t.jsx)(e.strong,{children:\\"Latency\\"}),\\" , \\",(0,t.jsx)(e.strong,{children:\\"Throughput\\"}),\\" , \\",(0,t.jsx)(e.strong,{children:\\"Errors\\"}),\\" , and \\",(0,t.jsx)(e.strong,{children:\\"Error Rates\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-flask-app.png\\",alt:\\"\\",width:\\"1080\\",height:\\"1141\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"When you\'re instrumenting real applications, under real load, you\'ll see a lot more connectivity (and errors!)\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Clicking on a transaction in the transaction view, in this case, our sample app\'s demo-transaction transaction, we can see exactly what operations were called:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-flask-app-demo-transaction.png\\",alt:\\"\\",width:\\"1079\\",height:\\"903\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This includes detailed information about calls to external services, such as database queries:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/free-open-elastic-apm-observability-deployment/blog-elastic-observability-span-details.png\\",alt:\\"\\",width:\\"1213\\",height:\\"677\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"whats-next\\",children:\\"What\'s next?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that you\'ve got your Elastic Observability cluster up and running and collecting out-of-the-box application trace data, explore the public APIs for the languages that your applications are using, which allow you to take your APM data to the next level. The APIs allow you to add custom metadata, define business transactions, create custom spans, and more. You can find the public API specs for the various APM agents (such as \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/java/current/public-api.html\\",rel:\\"nofollow\\",children:\\"Java\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/ruby/current/api.html\\",rel:\\"nofollow\\",children:\\"Ruby\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/python/current/index.html\\",rel:\\"nofollow\\",children:\\"Python\\"}),\\", and more) on the APM agent \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/index.html\\",rel:\\"nofollow\\",children:\\"documentation pages\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you\'d like to learn more about Elastic APM, check out \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/webinars/introduction-to-elastic-apm-in-the-shift-to-cloud-native\\",rel:\\"nofollow\\",children:\\"our webinar on Elastic APM in the shift to cloud native\\"}),\\" to see other ways that Elastic APM can help you in your ecosystem.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you decide that you\'d rather have us host your observability cluster, you can sign up for a free trial of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/\\",rel:\\"nofollow\\",children:\\"Elasticsearch Service on Elastic Cloud\\"}),\\" and change your agents to point to your new cluster.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Originally published May 5, 2021; updated April 6, 2023.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(c,{...n})}):c(n)}return v(A);})();\\n;return Component;"},"_id":"articles/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment.mdx","_raw":{"sourceFilePath":"articles/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment.mdx","sourceFileName":"adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment"},"type":"Article","imageUrl":"/assets/images/free-open-elastic-apm-observability-deployment/blog-thumb-release-apm.png","readingTime":"8 min read","url":"/free-open-elastic-apm-observability-deployment","headings":[{"level":2,"title":"What is APM?","href":"#what-is-apm"},{"level":2,"title":"Logical architecture","href":"#logical-architecture"},{"level":2,"title":"Install the Elastic Agent","href":"#install-the-elastic-agent"},{"level":2,"title":"Instrumenting sample code with an Elastic APM agent","href":"#instrumenting-sample-code-with-an-elastic-apm-agent"},{"level":3,"title":"Get the sample code (or use your own)","href":"#get-the-sample-code-or-use-your-own"},{"level":3,"title":"How to add the dependency","href":"#how-to-add-the-dependency"},{"level":3,"title":"Configure the agent","href":"#configure-the-agent"},{"level":2,"title":"What\'s next?","href":"#whats-next"}]},{"title":"Bringing observability insights from Elastic AI Assistant to the world of GitHub Copilot","slug":"ai-assistant-to-github-copilot","date":"2024-05-23","description":"GitHub announced GitHub Copilot Extensions this week at Microsoft Build. We are working with the GitHub team to bring observability insights from Elastic AI Assistant to GitHub Copilot users.","image":"githubcopilot-aiassistant-C-2x.png","author":[{"slug":"jeff-vestal","type":"Author","_raw":{}},{"slug":"hemant-malik","type":"Author","_raw":{}}],"tags":[{"slug":"ai-assistant","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}},{"slug":"azure","type":"Tag","_raw":{}}],"body":{"raw":"\\nGitHub [announced](https://github.blog/2024-05-21-introducing-github-copilot-extensions/) GitHub Copilot Extensions this week at Microsoft Build. We are working with the GitHub team in the Limited Beta Program to explore bringing observability insights from Elastic AI Assistant to GitHub Copilot users. \\n\\nElastic’s GitHub Copilot Extension aims to combine the capabilities of GitHub Copilot and Elastic AI Assistant for Observability. This could enable developers to access critical insights from Elastic AI Assistant from GitHub Copilot Chat on GitHub.com, Visual Studio, GitHub.com, Visual Studio, and VS Code - places where they write their code.\\n\\nDevelopers will be able ask questions such as\\n- What errors are active?\\n- What’s the latest stacktrace for my application?\\n- What caused a slowdown in the application after the last push to the dev environment?\\n- How to write an ES|QL for query that my app will send to Elasticsearch?\\n- What runbook from Github has been loaded into Elasticsearch and is related to the issue I’m investigating\\nAnd many more!\\n\\n[Watch Jeff\'s PoC Demo@Microsoft Build 2024](https://build.microsoft.com/en-US/sessions/acc48a7a-b412-4b4f-88a6-53ef4b2cb2bc?source=/schedule)\\n\\n![Elastic\'s Copilot Extension in VSCode](/assets/images/ai-assistant-to-github-copilot/elastic-copilot-vscode.png)\\n\\n_Elastic AI Assistant surfaced in GitHub Copilot Chat from our Extension (Proof of Concept)_\\n\\n## What is the Elastic AI Assistant for Observability\\n\\nThe Elastic Observability AI Assistant for Observability, a user-centric tool, is a game-changer in providing contextual insights and streamlining troubleshooting within the Elastic Observability environment. By harnessing generative AI capabilities, the assistant offers open prompts that decipher error messages and propose remediation actions. It adopts a Retrieval-Augmented Generation (RAG) approach to fetch the most pertinent internal information, such as APM traces, log messages, SLOs, GitHub issues, runbooks, and more. This contextual assistance is a huge leap forward for Site Reliability Engineers (SREs) and operations teams, offering immediate, relevant solutions to issues based on existing documentation and resources, boosting developer productivity.\\n\\nFor more information on setting up and using the AI Assistant for Observability check out the blog [Getting started with the Elastic AI Assistant for Observability and Microsoft Azure OpenAI](https://www.elastic.co/observability-labs/blog/elastic-ai-assistant-observability-microsoft-azure-openai). Additionally, learn how [Elastic Observability AI Assistant uses RAG to help analyze application issues with GitHub issues](https://www.elastic.co/observability-labs/blog/elastic-rag-ai-assistant-application-issues-llm-github).\\n\\nOne unique feature of the AI Assistant is its API support. This allows you to take advantage of all the capabilities provided by the Elastic AI Assistant, and integrate them right into your workflow.\\n\\n## What is a GitHub Copilot Extension\\n\\nGitHub Copilot Extensions, a new addition to GitHub Copilot, revolutionizes the developer experience by integrating a diverse array of tools and services directly into the developer\'s workflow. These unique extensions, crafted by partners, enable developers to interact with various services and tools using natural language within their Integrated Development Environment (IDE) or GitHub.com. This integration eliminates the need for context-switching, allowing developers to maintain their flow state, troubleshoot issues, and deploy solutions with unparalleled efficiency. These extensions will be accessible through GitHub Copilot Chat in the GitHub Marketplace, with options for organizations to create private extensions tailored to their internal tooling.\\n\\n## What’s next\\n\\nWe are participating in the Github Limited Beta Program as a partner and exploring the possibility of bringing Elastic GitHub Copilot Extension to the GitHub Marketplace. We are excited to unlock insights from Elastic Observability to GitHub Copilot users side by side to the code behind those services. Stay tuned!\\n\\nResources:\\n- [Getting Started with Elastic AI Assistant for Observability with Azure OpenAI](https://www.elastic.co/observability-labs/blog/elastic-ai-assistant-observability-microsoft-azure-openai)\\n- [The Elastic AI Assistant for Observability escapes Kibana!](https://ela.st/assistant-escapes)\\n- [Elastic Observability AI Assistant uses RAG to help analyze application issues with GitHub issues](https://www.elastic.co/observability-labs/blog/elastic-rag-ai-assistant-application-issues-llm-github)\\n- [Troubleshooting with Elastic AI Assistant using your organization\'s runbooks](https://www.elastic.co/observability-labs/blog/sre-troubleshooting-ai-assistant-observability-runbooks)\\n- [The AI Assistant Observability documentation](https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html)\\n- [GitHub Copilot Extensions Blog Announcement](https://github.blog/2024-05-21-introducing-github-copilot-extensions/)\\n- [ES|QL documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html)\\n\\n","code":"var Component=(()=>{var b=Object.create;var n=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var m=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),w=(e,t)=>{for(var s in t)n(e,s,{get:t[s],enumerable:!0})},l=(e,t,s,a)=>{if(t&&typeof t==\\"object\\"||typeof t==\\"function\\")for(let o of p(t))!f.call(e,o)&&o!==s&&n(e,o,{get:()=>t[o],enumerable:!(a=d(t,o))||a.enumerable});return e};var v=(e,t,s)=>(s=e!=null?b(g(e)):{},l(t||!e||!e.__esModule?n(s,\\"default\\",{value:e,enumerable:!0}):s,e)),y=e=>l(n({},\\"__esModule\\",{value:!0}),e);var c=m((x,r)=>{r.exports=_jsx_runtime});var E={};w(E,{default:()=>u,frontmatter:()=>A});var i=v(c()),A={title:\\"Bringing observability insights from Elastic AI Assistant to the world of GitHub Copilot\\",slug:\\"ai-assistant-to-github-copilot\\",date:\\"2024-05-23\\",description:\\"GitHub announced GitHub Copilot Extensions this week at Microsoft Build. We are working with the GitHub team to bring observability insights from Elastic AI Assistant to GitHub Copilot users.\\",author:[{slug:\\"jeff-vestal\\"},{slug:\\"hemant-malik\\"}],image:\\"githubcopilot-aiassistant-C-2x.png\\",tags:[{slug:\\"ai-assistant\\"},{slug:\\"genai\\"},{slug:\\"azure\\"}]};function h(e){let t={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",ul:\\"ul\\",...e.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(t.p,{children:[\\"GitHub \\",(0,i.jsx)(t.a,{href:\\"https://github.blog/2024-05-21-introducing-github-copilot-extensions/\\",rel:\\"nofollow\\",children:\\"announced\\"}),\\" GitHub Copilot Extensions this week at Microsoft Build. We are working with the GitHub team in the Limited Beta Program to explore bringing observability insights from Elastic AI Assistant to GitHub Copilot users.\\"]}),`\\n`,(0,i.jsx)(t.p,{children:\\"Elastic\\\\u2019s GitHub Copilot Extension aims to combine the capabilities of GitHub Copilot and Elastic AI Assistant for Observability. This could enable developers to access critical insights from Elastic AI Assistant from GitHub Copilot Chat on GitHub.com, Visual Studio, GitHub.com, Visual Studio, and VS Code - places where they write their code.\\"}),`\\n`,(0,i.jsx)(t.p,{children:\\"Developers will be able ask questions such as\\"}),`\\n`,(0,i.jsxs)(t.ul,{children:[`\\n`,(0,i.jsx)(t.li,{children:\\"What errors are active?\\"}),`\\n`,(0,i.jsx)(t.li,{children:\\"What\\\\u2019s the latest stacktrace for my application?\\"}),`\\n`,(0,i.jsx)(t.li,{children:\\"What caused a slowdown in the application after the last push to the dev environment?\\"}),`\\n`,(0,i.jsx)(t.li,{children:\\"How to write an ES|QL for query that my app will send to Elasticsearch?\\"}),`\\n`,(0,i.jsx)(t.li,{children:`What runbook from Github has been loaded into Elasticsearch and is related to the issue I\\\\u2019m investigating\\nAnd many more!`}),`\\n`]}),`\\n`,(0,i.jsx)(t.p,{children:(0,i.jsx)(t.a,{href:\\"https://build.microsoft.com/en-US/sessions/acc48a7a-b412-4b4f-88a6-53ef4b2cb2bc?source=/schedule\\",rel:\\"nofollow\\",children:\\"Watch Jeff\'s PoC Demo@Microsoft Build 2024\\"})}),`\\n`,(0,i.jsx)(t.p,{children:(0,i.jsx)(t.img,{src:\\"/assets/images/ai-assistant-to-github-copilot/elastic-copilot-vscode.png\\",alt:\\"Elastic\'s Copilot Extension in VSCode\\",width:\\"3093\\",height:\\"1931\\"})}),`\\n`,(0,i.jsx)(t.p,{children:(0,i.jsx)(t.em,{children:\\"Elastic AI Assistant surfaced in GitHub Copilot Chat from our Extension (Proof of Concept)\\"})}),`\\n`,(0,i.jsx)(t.h2,{id:\\"what-is-the-elastic-ai-assistant-for-observability\\",children:\\"What is the Elastic AI Assistant for Observability\\"}),`\\n`,(0,i.jsx)(t.p,{children:\\"The Elastic Observability AI Assistant for Observability, a user-centric tool, is a game-changer in providing contextual insights and streamlining troubleshooting within the Elastic Observability environment. By harnessing generative AI capabilities, the assistant offers open prompts that decipher error messages and propose remediation actions. It adopts a Retrieval-Augmented Generation (RAG) approach to fetch the most pertinent internal information, such as APM traces, log messages, SLOs, GitHub issues, runbooks, and more. This contextual assistance is a huge leap forward for Site Reliability Engineers (SREs) and operations teams, offering immediate, relevant solutions to issues based on existing documentation and resources, boosting developer productivity.\\"}),`\\n`,(0,i.jsxs)(t.p,{children:[\\"For more information on setting up and using the AI Assistant for Observability check out the blog \\",(0,i.jsx)(t.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-ai-assistant-observability-microsoft-azure-openai\\",rel:\\"nofollow\\",children:\\"Getting started with the Elastic AI Assistant for Observability and Microsoft Azure OpenAI\\"}),\\". Additionally, learn how \\",(0,i.jsx)(t.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-rag-ai-assistant-application-issues-llm-github\\",rel:\\"nofollow\\",children:\\"Elastic Observability AI Assistant uses RAG to help analyze application issues with GitHub issues\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(t.p,{children:\\"One unique feature of the AI Assistant is its API support. This allows you to take advantage of all the capabilities provided by the Elastic AI Assistant, and integrate them right into your workflow.\\"}),`\\n`,(0,i.jsx)(t.h2,{id:\\"what-is-a-github-copilot-extension\\",children:\\"What is a GitHub Copilot Extension\\"}),`\\n`,(0,i.jsx)(t.p,{children:\\"GitHub Copilot Extensions, a new addition to GitHub Copilot, revolutionizes the developer experience by integrating a diverse array of tools and services directly into the developer\'s workflow. These unique extensions, crafted by partners, enable developers to interact with various services and tools using natural language within their Integrated Development Environment (IDE) or GitHub.com. This integration eliminates the need for context-switching, allowing developers to maintain their flow state, troubleshoot issues, and deploy solutions with unparalleled efficiency. These extensions will be accessible through GitHub Copilot Chat in the GitHub Marketplace, with options for organizations to create private extensions tailored to their internal tooling.\\"}),`\\n`,(0,i.jsx)(t.h2,{id:\\"whats-next\\",children:\\"What\\\\u2019s next\\"}),`\\n`,(0,i.jsx)(t.p,{children:\\"We are participating in the Github Limited Beta Program as a partner and exploring the possibility of bringing Elastic GitHub Copilot Extension to the GitHub Marketplace. We are excited to unlock insights from Elastic Observability to GitHub Copilot users side by side to the code behind those services. Stay tuned!\\"}),`\\n`,(0,i.jsx)(t.p,{children:\\"Resources:\\"}),`\\n`,(0,i.jsxs)(t.ul,{children:[`\\n`,(0,i.jsx)(t.li,{children:(0,i.jsx)(t.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-ai-assistant-observability-microsoft-azure-openai\\",rel:\\"nofollow\\",children:\\"Getting Started with Elastic AI Assistant for Observability with Azure OpenAI\\"})}),`\\n`,(0,i.jsx)(t.li,{children:(0,i.jsx)(t.a,{href:\\"https://ela.st/assistant-escapes\\",rel:\\"nofollow\\",children:\\"The Elastic AI Assistant for Observability escapes Kibana!\\"})}),`\\n`,(0,i.jsx)(t.li,{children:(0,i.jsx)(t.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-rag-ai-assistant-application-issues-llm-github\\",rel:\\"nofollow\\",children:\\"Elastic Observability AI Assistant uses RAG to help analyze application issues with GitHub issues\\"})}),`\\n`,(0,i.jsx)(t.li,{children:(0,i.jsx)(t.a,{href:\\"https://www.elastic.co/observability-labs/blog/sre-troubleshooting-ai-assistant-observability-runbooks\\",rel:\\"nofollow\\",children:\\"Troubleshooting with Elastic AI Assistant using your organization\'s runbooks\\"})}),`\\n`,(0,i.jsx)(t.li,{children:(0,i.jsx)(t.a,{href:\\"https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html\\",rel:\\"nofollow\\",children:\\"The AI Assistant Observability documentation\\"})}),`\\n`,(0,i.jsx)(t.li,{children:(0,i.jsx)(t.a,{href:\\"https://github.blog/2024-05-21-introducing-github-copilot-extensions/\\",rel:\\"nofollow\\",children:\\"GitHub Copilot Extensions Blog Announcement\\"})}),`\\n`,(0,i.jsx)(t.li,{children:(0,i.jsx)(t.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html\\",rel:\\"nofollow\\",children:\\"ES|QL documentation\\"})}),`\\n`]})]})}function u(e={}){let{wrapper:t}=e.components||{};return t?(0,i.jsx)(t,{...e,children:(0,i.jsx)(h,{...e})}):h(e)}return y(E);})();\\n;return Component;"},"_id":"articles/ai-assistant-to-github-copilot.mdx","_raw":{"sourceFilePath":"articles/ai-assistant-to-github-copilot.mdx","sourceFileName":"ai-assistant-to-github-copilot.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/ai-assistant-to-github-copilot"},"type":"Article","imageUrl":"/assets/images/ai-assistant-to-github-copilot/githubcopilot-aiassistant-C-2x.png","readingTime":"4 min read","url":"/ai-assistant-to-github-copilot","headings":[{"level":2,"title":"What is the Elastic AI Assistant for Observability","href":"#what-is-the-elastic-ai-assistant-for-observability"},{"level":2,"title":"What is a GitHub Copilot Extension","href":"#what-is-a-github-copilot-extension"},{"level":2,"title":"What’s next","href":"#whats-next"}]},{"title":"Analyzing OpenTelemetry apps with Elastic AI Assistant and APM","slug":"analyzing-opentelemetry-apps-elastic-ai-assistant-apm","date":"2024-03-12","description":"Elastic Observability provides native OpenTelemetry support, but analyzing applications logs, metrics, and traces can be daunting. Elastic Observability not only provides AIOps features but also an AI Assistant (co-pilot) to help get to MTTR faster.","image":"ecs-otel-announcement-3.jpeg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"ai-assistant","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}}],"body":{"raw":"\\nOpenTelemetry is rapidly becoming the most expansive project within the Cloud Native Computing Foundation (CNCF), boasting as many commits as Kubernetes and garnering widespread support from customers. Numerous companies are adopting OpenTelemetry and integrating it into their applications. Elastic\xae offers detailed [guides](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app) on implementing OpenTelemetry for applications. However, like many applications, pinpointing and resolving issues can be time-consuming.\\n\\nThe [Elastic AI Assistant](https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability) significantly enhances the process, not only in identifying but also in resolving issues. This is further enhanced by Elastic’s new Service Level Objective (SLO) capability, allowing you to streamline your entire site reliability engineering (SRE) process from detecting potential issues to enhancing the overall customer experience.\\n\\nIn this blog, we will demonstrate how you, as an SRE, can detect issues in a service equipped with OpenTelemetry. We will explore problem identification using Elastic APM, Elastic’s AIOps capabilities, and the Elastic AI Assistant.\\n\\nWe will illustrate this using the [OpenTelemetry demo](https://github.com/elastic/opentelemetry-demo), with a [feature flag (cartService)](https://opentelemetry.io/docs/demo/feature-flags/) that is activated.\\n\\nOur walkthrough will encompass two scenarios:\\n\\n1. When the SLO for cart service becomes noncompliant, we will analyze the error through Elastic APM. The Elastic AI Assistant will assist by providing a runbook and a GitHub issue to facilitate issue analysis.\\n\\n2. Should the SLO for the cart service be noncompliant, we will examine the trace that indicates a high failure rate. We will employ AIOps for failure correlation and the AI Assistant to analyze logs and Kubernetes metrics directly from the Assistant.\\n\\n## Prerequisites and config\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up the configuration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co/) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)).\\n\\n- We used the OpenTelemetry Demo. Directions for using Elastic with OpenTelemetry Demo are [here](https://github.com/elastic/opentelemetry-demo).\\n\\n- Additionally you will need to connect your AI Assistant to your favorite LLM. We used Azure OpenAI GPT-4.\\n\\n- We also ran the OpenTelemetry Demo on Kubernetes, specifically on GKE.\\n\\n## SLO noncompliance\\n\\nElastic APM recently released the SLO (Service Level Objectives) feature in [8.12](https://www.elastic.co/guide/en/observability/8.12/slo.html). This feature enables setting measurable performance targets for services, such as [availability, latency, traffic, errors, and saturation or define your own](https://sre.google/sre-book/monitoring-distributed-systems/). Key components include:\\n\\n- Defining and monitoring SLIs (Service Level Indicators)\\n\\n- Monitoring error budgets indicating permissible performance shortfalls\\n\\n- Alerting on burn rates showing error budget consumption\\n\\nWe set up two SLOs for cart service:\\n\\n- **Availability SLO** , which monitors its availability by ensuring that transactions succeed. We set up the feature flag in the OpenTelemetry application, which generates an error for EmptyCart transactions 10% of the time.\\n\\n- **Latency SLO** to ensure transactions are not going below a specific latency, which will reduce customer experiences.\\n\\n![1 - SLOs](/assets/images/analyzing-opentelemetry-apps-elastic-ai-assistant-apm/image1.png)\\n\\nBecause of the OTel cartservice feature flag, the availability SLO is triggered, and within the SLO details, we see that over a seven-day period the availability is well below our target of 99.9, at 95.5. Additionally all the error budget that was available is also exhausted.\\n\\n![2 - cart service otel](/assets/images/analyzing-opentelemetry-apps-elastic-ai-assistant-apm/image2.png)\\n\\nWith SLO, you can easily identify when issues with customer experience occur, or when potential issues with services arise before they become potentially worse.\\n\\n## Scenario 1: Analyzing APM trace and logs with AI Assistant\\n\\nOnce the SLO is found as non-compliant, we can dive into cart service to investigate in Elastic APM. The following walks through the set of steps you can take in Elastic APM and how to use the AI Assistant to analyze the issue:\\n\\n\\n\\nFrom the video, we can see that once in APM, we took the following steps.\\n\\n1. Investigated the trace EmptyCart, which was experiencing larger than normal failure rates.\\n\\n2. The trace showed a significant number of failures, which also resulted in slightly larger latency.\\n\\n3. We used AIOps failure correlation to identify the potential component causing the failure, which correlated to a field value of FailedPrecondition.\\n\\n4. While filtering on that value and reviewing the logs, we still couldn’t understand what this meant.\\n\\n5. This is where you can use Elastic’s AI Assistant to further your understanding of the issue.\\n\\nAI Assistant helped us analyze the following:\\n\\n1. It helped us understand what the log message meant and that it was related to the Redis connection failure issue.\\n\\n2. Because we couldn’t connect to Redis, we asked the AI Assistant to give us the metrics for the Redis Kubernetes pods.\\n\\n3. We learned there were two pods for Redis from the logs over the last two hours.\\n\\n4. However, we also learned that the memory of one seems to be increasing.\\n\\n5. It seems that Redis restarted (hence the second pod), and with this information we could dive deeper into what happened to Redis.\\n\\nYou can see how quickly we could correlate a significant amount of information, logs, metrics, and traces through the AI Assistant and Elastic’s APM capabilities. We didn’t have to go through multiple screens to hunt down information.\\n\\n## Scenario 2: Analyzing APM error with AI Assistant\\n\\nOnce the SLO is found as noncompliant, we can dive into cart service to investigate in Elastic APM. The following walks through the set of steps you can take in Elastic APM and use the AI Assistant to analyze the issue:\\n\\n\\n\\nFrom the video, we can see that once in APM, we took the following steps:\\n\\n1. We noticed a specific error for the APM service.\\n\\n2. We investigated this in the error tab, and while we see it’s an issue with connection to Redis, we still need more information.\\n\\n3. The AI Assistant helps us understand the stacktrace and provides some potential causes for the error and ways to diagnose and resolve it.\\n\\n4. We also asked it for a runbook, created by our SRE team, which gives us steps to work through this particular issue.\\n\\nBut as you can see, AI Assistant provides us not only with information about the error message but also how to diagnose it and potentially resolve it with an internal runbook.\\n\\n## Achieving operational excellence, optimal performance, and reliability\\n\\nWe’ve shown how an OpenTelemetry instrumented application (OTel demo) can be analyzed using Elastic’s features, especially the AI Assistant coupled with Elastic APM, AIOps, and the latest SLO features. Elastic significantly streamlines the process of identifying and resolving issues within your applications.\\n\\nThrough our detailed walkthrough of two distinct scenarios, we have seen how Elastic APM and the AI Assistant can efficiently analyze and address noncompliance with SLOs in a cart service. The ability to quickly correlate information, logs, metrics, and traces through these tools not only saves time but also enhances the overall effectiveness of the troubleshooting process.\\n\\nThe use of Elastic\'s AI Assistant in these scenarios underscores the value of integrating advanced AI capabilities into operational workflows. It goes beyond simple error analysis, offering insights into potential causes and providing actionable solutions, sometimes even with customized runbooks. This integration of technology fundamentally changes how SREs approach problem-solving, making the process more efficient and less reliant on manual investigation.\\n\\nOverall, the advancements in Elastic’s APM, AIOps capabilities, and the AI Assistant, particularly in handling OpenTelemetry data, represent a significant step forward in operational excellence. These tools enable SREs to not only react swiftly to emerging issues but also proactively manage and optimize the performance and reliability of their services, thereby ensuring an enhanced customer experience.\\n\\n## Try it out\\n\\nExisting Elastic Cloud customers can access many of these features directly from the [Elastic Cloud console](https://cloud.elastic.co/). Not taking advantage of Elastic on cloud? [Start a free trial](https://www.elastic.co/cloud/cloud-trial-overview).\\n\\n> - [Build better Service Level Objectives (SLOs) from logs and metrics](https://www.elastic.co/blog/service-level-objectives-slos-logs-metrics)\\n> - [Elastic Observability 8.12: GA for AI Assistant, SLO, and Mobile APM support](https://www.elastic.co/blog/whats-new-elastic-observability-8-12-0)\\n> - [Native Observability support in Elastic Observability](https://www.elastic.co/blog/native-opentelemetry-support-in-elastic-observability)\\n> - [Context-aware insights using the Elastic AI Assistant for Observability](https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability)\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),y=(n,e)=>{for(var t in e)a(n,t,{get:e[t],enumerable:!0})},r=(n,e,t,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let s of g(e))!f.call(n,s)&&s!==t&&a(n,s,{get:()=>e[s],enumerable:!(o=u(e,s))||o.enumerable});return n};var b=(n,e,t)=>(t=n!=null?p(m(n)):{},r(e||!n||!n.__esModule?a(t,\\"default\\",{value:n,enumerable:!0}):t,n)),v=n=>r(a({},\\"__esModule\\",{value:!0}),n);var c=w((k,l)=>{l.exports=_jsx_runtime});var O={};y(O,{default:()=>d,frontmatter:()=>A});var i=b(c()),A={title:\\"Analyzing OpenTelemetry apps with Elastic AI Assistant and APM\\",slug:\\"analyzing-opentelemetry-apps-elastic-ai-assistant-apm\\",date:\\"2024-03-12\\",description:\\"Elastic Observability provides native OpenTelemetry support, but analyzing applications logs, metrics, and traces can be daunting. Elastic Observability not only provides AIOps features but also an AI Assistant (co-pilot) to help get to MTTR faster.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"ecs-otel-announcement-3.jpeg\\",tags:[{slug:\\"ai-assistant\\"},{slug:\\"apm\\"},{slug:\\"genai\\"},{slug:\\"apm\\"},{slug:\\"opentelemetry\\"}]};function h(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components},{Video:t}=e;return t||E(\\"Video\\",!0),(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(e.p,{children:[\\"OpenTelemetry is rapidly becoming the most expansive project within the Cloud Native Computing Foundation (CNCF), boasting as many commits as Kubernetes and garnering widespread support from customers. Numerous companies are adopting OpenTelemetry and integrating it into their applications. Elastic\\\\xAE offers detailed \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"guides\\"}),\\" on implementing OpenTelemetry for applications. However, like many applications, pinpointing and resolving issues can be time-consuming.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"The \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability\\",rel:\\"nofollow\\",children:\\"Elastic AI Assistant\\"}),\\" significantly enhances the process, not only in identifying but also in resolving issues. This is further enhanced by Elastic\\\\u2019s new Service Level Objective (SLO) capability, allowing you to streamline your entire site reliability engineering (SRE) process from detecting potential issues to enhancing the overall customer experience.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"In this blog, we will demonstrate how you, as an SRE, can detect issues in a service equipped with OpenTelemetry. We will explore problem identification using Elastic APM, Elastic\\\\u2019s AIOps capabilities, and the Elastic AI Assistant.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"We will illustrate this using the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry demo\\"}),\\", with a \\",(0,i.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/demo/feature-flags/\\",rel:\\"nofollow\\",children:\\"feature flag (cartService)\\"}),\\" that is activated.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Our walkthrough will encompass two scenarios:\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"When the SLO for cart service becomes noncompliant, we will analyze the error through Elastic APM. The Elastic AI Assistant will assist by providing a runbook and a GitHub issue to facilitate issue analysis.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Should the SLO for the cart service be noncompliant, we will examine the trace that indicates a high failure rate. We will employ AIOps for failure correlation and the AI Assistant to analyze logs and Kubernetes metrics directly from the Assistant.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up the configuration:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[\\"Ensure you have an account on \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\").\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[\\"We used the OpenTelemetry Demo. Directions for using Elastic with OpenTelemetry Demo are \\",(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Additionally you will need to connect your AI Assistant to your favorite LLM. We used Azure OpenAI GPT-4.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"We also ran the OpenTelemetry Demo on Kubernetes, specifically on GKE.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"slo-noncompliance\\",children:\\"SLO noncompliance\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic APM recently released the SLO (Service Level Objectives) feature in \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.12/slo.html\\",rel:\\"nofollow\\",children:\\"8.12\\"}),\\". This feature enables setting measurable performance targets for services, such as \\",(0,i.jsx)(e.a,{href:\\"https://sre.google/sre-book/monitoring-distributed-systems/\\",rel:\\"nofollow\\",children:\\"availability, latency, traffic, errors, and saturation or define your own\\"}),\\". Key components include:\\"]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Defining and monitoring SLIs (Service Level Indicators)\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Monitoring error budgets indicating permissible performance shortfalls\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Alerting on burn rates showing error budget consumption\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"We set up two SLOs for cart service:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Availability SLO\\"}),\\" , which monitors its availability by ensuring that transactions succeed. We set up the feature flag in the OpenTelemetry application, which generates an error for EmptyCart transactions 10% of the time.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Latency SLO\\"}),\\" to ensure transactions are not going below a specific latency, which will reduce customer experiences.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/analyzing-opentelemetry-apps-elastic-ai-assistant-apm/image1.png\\",alt:\\"1 - SLOs\\",width:\\"1952\\",height:\\"1434\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Because of the OTel cartservice feature flag, the availability SLO is triggered, and within the SLO details, we see that over a seven-day period the availability is well below our target of 99.9, at 95.5. Additionally all the error budget that was available is also exhausted.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/analyzing-opentelemetry-apps-elastic-ai-assistant-apm/image2.png\\",alt:\\"2 - cart service otel\\",width:\\"1999\\",height:\\"1127\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"With SLO, you can easily identify when issues with customer experience occur, or when potential issues with services arise before they become potentially worse.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"scenario-1-analyzing-apm-trace-and-logs-with-ai-assistant\\",children:\\"Scenario 1: Analyzing APM trace and logs with AI Assistant\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once the SLO is found as non-compliant, we can dive into cart service to investigate in Elastic APM. The following walks through the set of steps you can take in Elastic APM and how to use the AI Assistant to analyze the issue:\\"}),`\\n`,(0,i.jsx)(t,{vidyardUuid:\\"FSpw53JN9Xu32V1kLQCE8z\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"From the video, we can see that once in APM, we took the following steps.\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Investigated the trace EmptyCart, which was experiencing larger than normal failure rates.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"The trace showed a significant number of failures, which also resulted in slightly larger latency.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"We used AIOps failure correlation to identify the potential component causing the failure, which correlated to a field value of FailedPrecondition.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"While filtering on that value and reviewing the logs, we still couldn\\\\u2019t understand what this meant.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"This is where you can use Elastic\\\\u2019s AI Assistant to further your understanding of the issue.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"AI Assistant helped us analyze the following:\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"It helped us understand what the log message meant and that it was related to the Redis connection failure issue.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Because we couldn\\\\u2019t connect to Redis, we asked the AI Assistant to give us the metrics for the Redis Kubernetes pods.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"We learned there were two pods for Redis from the logs over the last two hours.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"However, we also learned that the memory of one seems to be increasing.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"It seems that Redis restarted (hence the second pod), and with this information we could dive deeper into what happened to Redis.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"You can see how quickly we could correlate a significant amount of information, logs, metrics, and traces through the AI Assistant and Elastic\\\\u2019s APM capabilities. We didn\\\\u2019t have to go through multiple screens to hunt down information.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"scenario-2-analyzing-apm-error-with-ai-assistant\\",children:\\"Scenario 2: Analyzing APM error with AI Assistant\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once the SLO is found as noncompliant, we can dive into cart service to investigate in Elastic APM. The following walks through the set of steps you can take in Elastic APM and use the AI Assistant to analyze the issue:\\"}),`\\n`,(0,i.jsx)(t,{vidyardUuid:\\"dVScqDxPJWCPCeGu8WMoCw\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"From the video, we can see that once in APM, we took the following steps:\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"We noticed a specific error for the APM service.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"We investigated this in the error tab, and while we see it\\\\u2019s an issue with connection to Redis, we still need more information.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"The AI Assistant helps us understand the stacktrace and provides some potential causes for the error and ways to diagnose and resolve it.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"We also asked it for a runbook, created by our SRE team, which gives us steps to work through this particular issue.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"But as you can see, AI Assistant provides us not only with information about the error message but also how to diagnose it and potentially resolve it with an internal runbook.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"achieving-operational-excellence-optimal-performance-and-reliability\\",children:\\"Achieving operational excellence, optimal performance, and reliability\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"We\\\\u2019ve shown how an OpenTelemetry instrumented application (OTel demo) can be analyzed using Elastic\\\\u2019s features, especially the AI Assistant coupled with Elastic APM, AIOps, and the latest SLO features. Elastic significantly streamlines the process of identifying and resolving issues within your applications.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Through our detailed walkthrough of two distinct scenarios, we have seen how Elastic APM and the AI Assistant can efficiently analyze and address noncompliance with SLOs in a cart service. The ability to quickly correlate information, logs, metrics, and traces through these tools not only saves time but also enhances the overall effectiveness of the troubleshooting process.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"The use of Elastic\'s AI Assistant in these scenarios underscores the value of integrating advanced AI capabilities into operational workflows. It goes beyond simple error analysis, offering insights into potential causes and providing actionable solutions, sometimes even with customized runbooks. This integration of technology fundamentally changes how SREs approach problem-solving, making the process more efficient and less reliant on manual investigation.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Overall, the advancements in Elastic\\\\u2019s APM, AIOps capabilities, and the AI Assistant, particularly in handling OpenTelemetry data, represent a significant step forward in operational excellence. These tools enable SREs to not only react swiftly to emerging issues but also proactively manage and optimize the performance and reliability of their services, thereby ensuring an enhanced customer experience.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Existing Elastic Cloud customers can access many of these features directly from the \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"Elastic Cloud console\\"}),\\". Not taking advantage of Elastic on cloud? \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/cloud-trial-overview\\",rel:\\"nofollow\\",children:\\"Start a free trial\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.blockquote,{children:[`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/service-level-objectives-slos-logs-metrics\\",rel:\\"nofollow\\",children:\\"Build better Service Level Objectives (SLOs) from logs and metrics\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-observability-8-12-0\\",rel:\\"nofollow\\",children:\\"Elastic Observability 8.12: GA for AI Assistant, SLO, and Mobile APM support\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/native-opentelemetry-support-in-elastic-observability\\",rel:\\"nofollow\\",children:\\"Native Observability support in Elastic Observability\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability\\",rel:\\"nofollow\\",children:\\"Context-aware insights using the Elastic AI Assistant for Observability\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,i.jsx)(e,{...n,children:(0,i.jsx)(h,{...n})}):h(n)}function E(n,e){throw new Error(\\"Expected \\"+(e?\\"component\\":\\"object\\")+\\" `\\"+n+\\"` to be defined: you likely forgot to import, pass, or provide it.\\")}return v(O);})();\\n;return Component;"},"_id":"articles/analyzing-opentelemetry-apps-elastic-ai-assistant-apm.mdx","_raw":{"sourceFilePath":"articles/analyzing-opentelemetry-apps-elastic-ai-assistant-apm.mdx","sourceFileName":"analyzing-opentelemetry-apps-elastic-ai-assistant-apm.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/analyzing-opentelemetry-apps-elastic-ai-assistant-apm"},"type":"Article","imageUrl":"/assets/images/analyzing-opentelemetry-apps-elastic-ai-assistant-apm/ecs-otel-announcement-3.jpeg","readingTime":"8 min read","url":"/analyzing-opentelemetry-apps-elastic-ai-assistant-apm","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"SLO noncompliance","href":"#slo-noncompliance"},{"level":2,"title":"Scenario 1: Analyzing APM trace and logs with AI Assistant","href":"#scenario-1-analyzing-apm-trace-and-logs-with-ai-assistant"},{"level":2,"title":"Scenario 2: Analyzing APM error with AI Assistant","href":"#scenario-2-analyzing-apm-error-with-ai-assistant"},{"level":2,"title":"Achieving operational excellence, optimal performance, and reliability","href":"#achieving-operational-excellence-optimal-performance-and-reliability"},{"level":2,"title":"Try it out","href":"#try-it-out"}]},{"title":"The antidote for index mapping exceptions: ignore_malformed","slug":"antidote-index-mapping-exceptions-ignore-malformed","date":"2023-08-03","description":"How an almost unknown setting called ignore_malformed can make the difference between dropping a document entirely if a single field is malformed or just ignoring that field and ingesting the document anyway.","image":"illustration-stack-modernize-solutions-1689x980_(1).png","author":[{"slug":"giuseppe-santoro","type":"Author","_raw":{}}],"subtitle":"Ignore fields not compliant with index mappings and avoid dropping documents during ingestion to Elasticsearch\xae","tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn this article, I\'ll explain how the setting _ignore_malformed_ can make the difference between a 100% dropping rate and a 100% success rate, even with ignoring some malformed fields.\\n\\nAs a senior software engineer working at Elastic\xae, I have been on the first line of support for anything related to Beats or Elastic Agent running on Kubernetes and Cloud Native integrations like Nginx ingress controller.\\n\\nDuring my experience, I have seen all sorts of issues. Users have very different requirements. But at some point during their experience, most of them encounter a very common problem with Elasticsearch: _index mapping exceptions_.\\n\\n## How mappings work\\n\\nLike any other document-based NoSQL database, Elasticsearch doesn’t force you to provide the document schema (called [index mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html) or simply _mapping_) upfront. If you provide a mapping, it will use it. Otherwise, it will infer one from the first document or any subsequent documents that contain new fields.\\n\\nIn reality, the situation is not black and white. You can also provide a partial mapping that covers only some of the fields, like the most common fields, and leave Elasticsearch to figure out the mapping of all the other fields during ingestion with [Dynamic Mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-mapping.html).\\n\\n## What happens when data is malformed?\\n\\nNo matter if you specified a mapping upfront or if Elasticsearch inferred one automatically, Elasticsearch will drop an entire document with just one field that doesn\'t match the mapping of an index and return an error instead. This is not much different from what happens with other SQL databases or NoSQL data stores with inferred schemas. The reason for this behavior is to prevent malformed data and exceptions at query time.\\n\\nA problem arises if a user doesn\'t look at the ingestion logs and misses those errors. They might never figure out that something went wrong, or even worse, Elasticsearch might stop ingesting data entirely if all the subsequent documents are malformed.\\n\\nThe above situation sounds very catastrophic, but it\'s entirely possible since I have seen it many times when on-call for support or on [discuss.elastic.co](https://discuss.elastic.co/latest). The situation is even more likely to happen if you have user-generated documents, so you don\'t have full control over the quality of your data.\\n\\nLuckily, there is a setting that not many people know about in Elasticsearch that solves the exact problems above. This field has been there since [Elasticsearch 2.0](https://www.elastic.co/guide/en/elasticsearch/reference/2.0/ignore-malformed.html). We are talking ancient history here since the latest version of the stack at the time of writing is [Elastic Stack 8.9.0](https://www.elastic.co/blog/whats-new-elastic-enterprise-search-8-9-0).\\n\\nLet\'s now dive into how to use this Elasticsearch feature.\\n\\n## A toy use case\\n\\nTo make it easier to interact with Elasticsearch, I am going to use [Kibana\xae Dev Tools](https://www.elastic.co/guide/en/kibana/current/console-kibana.html) in this tutorial.\\n\\nThe following examples are taken from the official documentation on [ignore_malformed](https://www.elastic.co/guide/en/elasticsearch/reference/8.8/ignore-malformed.html#ignore-malformed). I am here to expand on those examples by providing a few more details about what happens behind the scenes and on how to search for ignored fields. We are going to use the index name _my-index_, but feel free to change that to whatever you like.\\n\\nFirst, we want to create an index mapping with two fields called _number_one_ and _number_two_. Both fields have type _integer_, but only one of them has _ **ignore_malformed** _ set to true, and the other one inherits the default value _ignore_malformed: false_ instead.\\n\\n```json\\nPUT my-index\\n{\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"number_one\\": {\\n \\"type\\": \\"integer\\",\\n \\"ignore_malformed\\": true\\n },\\n \\"number_two\\": {\\n \\"type\\": \\"integer\\"\\n }\\n }\\n }\\n}\\n```\\n\\nIf the mentioned index didn’t exist before and the previous command ran successfully, you should get the following result:\\n\\n```json\\n{\\n \\"acknowledged\\": true,\\n \\"shards_acknowledged\\": true,\\n \\"index\\": \\"my-index\\"\\n}\\n```\\n\\nTo double-check that the above mapping has been created correctly, we can query the newly created index with the command:\\n\\n```bash\\nGET my-index/_mapping\\n```\\n\\nYou should get the following result:\\n\\n```json\\n{\\n \\"my-index\\": {\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"number_one\\": {\\n \\"type\\": \\"integer\\",\\n \\"ignore_malformed\\": true\\n },\\n \\"number_two\\": {\\n \\"type\\": \\"integer\\"\\n }\\n }\\n }\\n }\\n}\\n```\\n\\nNow we can ingest two sample documents — both invalid:\\n\\n```bash\\nPUT my-index/_doc/1\\n{\\n \\"text\\": \\"Some text value\\",\\n \\"number_one\\": \\"foo\\"\\n}\\n\\nPUT my-index/_doc/2\\n{\\n \\"text\\": \\"Some text value\\",\\n \\"number_two\\": \\"foo\\"\\n}\\n```\\n\\nThe document with _id=1_ is correctly ingested, while the document with _id=2_ fails with the following error. The difference between those two documents is in which field we are trying to ingest a sample string “foo” instead of an integer.\\n\\n```json\\n{\\n \\"error\\": {\\n \\"root_cause\\": [\\n {\\n \\"type\\": \\"document_parsing_exception\\",\\n \\"reason\\": \\"[3:17] failed to parse field [number_two] of type [integer] in document with id \'2\'. Preview of field\'s value: \'foo\'\\"\\n }\\n ],\\n \\"type\\": \\"document_parsing_exception\\",\\n \\"reason\\": \\"[3:17] failed to parse field [number_two] of type [integer] in document with id \'2\'. Preview of field\'s value: \'foo\'\\",\\n \\"caused_by\\": {\\n \\"type\\": \\"number_format_exception\\",\\n \\"reason\\": \\"For input string: \\\\\\"foo\\\\\\"\\"\\n }\\n },\\n \\"status\\": 400\\n}\\n```\\n\\nDepending on the client used for ingesting your documents, you might get different errors or warnings, but logically the problem is the same. The entire document is not ingested because part of it doesn’t conform with the index mapping. There are too many possible error messages to name, but suffice it to say that malformed data is quite a common problem. And we need a better way to handle it.\\n\\nNow that at least one document has been ingested, you can try searching with the following query:\\n\\n```bash\\nGET my-index/_search\\n{\\n \\"fields\\": [\\n \\"*\\"\\n ]\\n}\\n```\\n\\nHere, the parameter _fields_ is required to show the values of those fields that have been ignored. More on this later.\\n\\nFrom the result, you can see that only the first document (with _id=1_) has been ingested correctly while the second document (with _id=2_) has been completely dropped.\\n\\n```json\\n{\\n \\"took\\": 14,\\n \\"timed_out\\": false,\\n \\"_shards\\": {\\n \\"total\\": 1,\\n \\"successful\\": 1,\\n \\"skipped\\": 0,\\n \\"failed\\": 0\\n },\\n \\"hits\\": {\\n \\"total\\": {\\n \\"value\\": 1,\\n \\"relation\\": \\"eq\\"\\n },\\n \\"max_score\\": null,\\n \\"hits\\": [\\n {\\n \\"_index\\": \\"my-index\\",\\n \\"_id\\": \\"1\\",\\n \\"_score\\": null,\\n \\"_ignored\\": [\\"number_one\\"],\\n \\"_source\\": {\\n \\"text\\": \\"Some text value\\",\\n \\"number_one\\": \\"foo\\"\\n },\\n \\"fields\\": {\\n \\"text\\": [\\"Some text value\\"],\\n \\"text.keyword\\": [\\"Some text value\\"]\\n },\\n \\"ignored_field_values\\": {\\n \\"number_one\\": [\\"foo\\"]\\n },\\n \\"sort\\": [\\"1\\"]\\n }\\n ]\\n }\\n}\\n```\\n\\nFrom the above JSON response, you will notice some things, such as:\\n\\n- A new field called _ **\\\\_ignored** _ of type array with the list of all fields that have been ignored while ingesting documents\\n- A new field called _ **ignored_field_values** _ with a dictionary of ignored fields and their values\\n- The field called \\\\__ **source** _ contains the original document unmodified. This is especially useful if you want to fix the problems with the mapping later.\\n- The field called _ **text** _ was not present in the original mapping, but it is now included since Elasticsearch automatically inferred the type of this field. In fact, if you try to query the mapping of the index _ **my-index** _ again via the command:\\n\\n```bash\\nGET my-index/_mapping\\n```\\n\\nYou should get this result:\\n\\n```json\\n{\\n \\"my-index\\": {\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"number_one\\": {\\n \\"type\\": \\"integer\\",\\n \\"ignore_malformed\\": true\\n },\\n \\"number_two\\": {\\n \\"type\\": \\"integer\\"\\n },\\n \\"text\\": {\\n \\"type\\": \\"text\\",\\n \\"fields\\": {\\n \\"keyword\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 256\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n```\\n\\nFinally, if you ingest some valid documents like the following command:\\n\\n```bash\\nPUT my-index/_doc/3\\n{\\n \\"text\\": \\"Some text value\\",\\n \\"number_two\\": 10\\n}\\n```\\n\\nYou can check how many documents have at least one ignored field with the following [Exists query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html):\\n\\n```bash\\nGET my-index/_search\\n{\\n \\"query\\": {\\n \\"exists\\": {\\n \\"field\\": \\"_ignored\\"\\n }\\n }\\n}\\n```\\n\\nYou can also see that out of the two documents ingested (with _id=1_ and _id=3_) only the document with _id=1_ contains an ignored field.\\n\\n```json\\n{\\n \\"took\\": 193,\\n \\"timed_out\\": false,\\n \\"_shards\\": {\\n \\"total\\": 1,\\n \\"successful\\": 1,\\n \\"skipped\\": 0,\\n \\"failed\\": 0\\n },\\n \\"hits\\": {\\n \\"total\\": {\\n \\"value\\": 1,\\n \\"relation\\": \\"eq\\"\\n },\\n \\"max_score\\": 1,\\n \\"hits\\": [\\n {\\n \\"_index\\": \\"my-index\\",\\n \\"_id\\": \\"1\\",\\n \\"_score\\": 1,\\n \\"_ignored\\": [\\"number_one\\"],\\n \\"_source\\": {\\n \\"text\\": \\"Some text value\\",\\n \\"number_one\\": \\"foo\\"\\n }\\n }\\n ]\\n }\\n}\\n```\\n\\nAlternatively, you can search for all documents that have a specific field being ignored with this [Terms query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html):\\n\\n```bash\\nGET my-index/_search\\n{\\n \\"query\\": {\\n \\"terms\\": {\\n \\"_ignored\\": [ \\"number_one\\"]\\n }\\n }\\n}\\n```\\n\\nThe result, in this case, will be the same as the previous one since we only managed to ingest a single document with that exact single field ignored.\\n\\n## Conclusion\\n\\nBecause we are a big fan of this flag, we\'ve enabled _ **ignore_malformed** _ by default for all Elastic integrations and in the [default index template for logs data streams](https://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/core/template-resources/src/main/resources/logs-settings.json#L13) as of 8.9.0. More information can be found in the official documentation for [ignore_malformed](https://www.elastic.co/guide/en/elasticsearch/reference/8.9/ignore-malformed.html).\\n\\nAnd since I am personally working on this feature, I can reassure you that it is a game changer.\\n\\nYou can start by setting _ **ignore_malformed** _ on any cluster manually before Elastic Stack 8.9.0. Or you can use the defaults that we set for you starting from [Elastic Stack 8.9.0](https://www.elastic.co/blog/whats-new-elastic-enterprise-search-8-9-0).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var m=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var i in e)r(t,i,{get:e[i],enumerable:!0})},s=(t,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of p(e))!g.call(t,o)&&o!==i&&r(t,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return t};var _=(t,e,i)=>(i=t!=null?m(f(t)):{},s(e||!t||!t.__esModule?r(i,\\"default\\",{value:t,enumerable:!0}):i,t)),b=t=>s(r({},\\"__esModule\\",{value:!0}),t);var d=w((T,l)=>{l.exports=_jsx_runtime});var v={};y(v,{default:()=>h,frontmatter:()=>x});var n=_(d()),x={title:\\"The antidote for index mapping exceptions: ignore_malformed\\",slug:\\"antidote-index-mapping-exceptions-ignore-malformed\\",date:\\"2023-08-03\\",subtitle:\\"Ignore fields not compliant with index mappings and avoid dropping documents during ingestion to Elasticsearch\\\\xAE\\",description:\\"How an almost unknown setting called ignore_malformed can make the difference between dropping a document entirely if a single field is malformed or just ignoring that field and ingesting the document anyway.\\",author:[{slug:\\"giuseppe-santoro\\"}],image:\\"illustration-stack-modernize-solutions-1689x980_(1).png\\",tags:[{slug:\\"log-analytics\\"}]};function c(t){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"In this article, I\'ll explain how the setting \\",(0,n.jsx)(e.em,{children:\\"ignore_malformed\\"}),\\" can make the difference between a 100% dropping rate and a 100% success rate, even with ignoring some malformed fields.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"As a senior software engineer working at Elastic\\\\xAE, I have been on the first line of support for anything related to Beats or Elastic Agent running on Kubernetes and Cloud Native integrations like Nginx ingress controller.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"During my experience, I have seen all sorts of issues. Users have very different requirements. But at some point during their experience, most of them encounter a very common problem with Elasticsearch\\",(0,n.jsx)(\\"sup\\",{}),\\": \\",(0,n.jsx)(e.em,{children:\\"index mapping exceptions\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"how-mappings-work\\",children:\\"How mappings work\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Like any other document-based NoSQL database, Elasticsearch doesn\\\\u2019t force you to provide the document schema (called \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html\\",rel:\\"nofollow\\",children:\\"index mapping\\"}),\\" or simply \\",(0,n.jsx)(e.em,{children:\\"mapping\\"}),\\") upfront. If you provide a mapping, it will use it. Otherwise, it will infer one from the first document or any subsequent documents that contain new fields.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In reality, the situation is not black and white. You can also provide a partial mapping that covers only some of the fields, like the most common fields, and leave Elasticsearch to figure out the mapping of all the other fields during ingestion with \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-mapping.html\\",rel:\\"nofollow\\",children:\\"Dynamic Mapping\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"what-happens-when-data-is-malformed\\",children:\\"What happens when data is malformed?\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"No matter if you specified a mapping upfront or if Elasticsearch inferred one automatically, Elasticsearch will drop an entire document with just one field that doesn\'t match the mapping of an index and return an error instead. This is not much different from what happens with other SQL databases or NoSQL data stores with inferred schemas. The reason for this behavior is to prevent malformed data and exceptions at query time.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"A problem arises if a user doesn\'t look at the ingestion logs and misses those errors. They might never figure out that something went wrong, or even worse, Elasticsearch might stop ingesting data entirely if all the subsequent documents are malformed.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The above situation sounds very catastrophic, but it\'s entirely possible since I have seen it many times when on-call for support or on \\",(0,n.jsx)(e.a,{href:\\"https://discuss.elastic.co/latest\\",rel:\\"nofollow\\",children:\\"discuss.elastic.co\\"}),\\". The situation is even more likely to happen if you have user-generated documents, so you don\'t have full control over the quality of your data.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Luckily, there is a setting that not many people know about in Elasticsearch that solves the exact problems above. This field has been there since \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/2.0/ignore-malformed.html\\",rel:\\"nofollow\\",children:\\"Elasticsearch 2.0\\"}),\\". We are talking ancient history here since the latest version of the stack at the time of writing is \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-enterprise-search-8-9-0\\",rel:\\"nofollow\\",children:\\"Elastic Stack 8.9.0\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\'s now dive into how to use this Elasticsearch feature.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"a-toy-use-case\\",children:\\"A toy use case\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To make it easier to interact with Elasticsearch, I am going to use \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/console-kibana.html\\",rel:\\"nofollow\\",children:\\"Kibana\\\\xAE Dev Tools\\"}),\\" in this tutorial.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The following examples are taken from the official documentation on \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/8.8/ignore-malformed.html#ignore-malformed\\",rel:\\"nofollow\\",children:\\"ignore_malformed\\"}),\\". I am here to expand on those examples by providing a few more details about what happens behind the scenes and on how to search for ignored fields. We are going to use the index name \\",(0,n.jsx)(e.em,{children:\\"my-index\\"}),\\", but feel free to change that to whatever you like.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"First, we want to create an index mapping with two fields called \\",(0,n.jsx)(e.em,{children:\\"number_one\\"}),\\" and \\",(0,n.jsx)(e.em,{children:\\"number_two\\"}),\\". Both fields have type \\",(0,n.jsx)(e.em,{children:\\"integer\\"}),\\", but only one of them has _ \\",(0,n.jsx)(e.strong,{children:\\"ignore_malformed\\"}),\\" _ set to true, and the other one inherits the default value \\",(0,n.jsx)(e.em,{children:\\"ignore_malformed: false\\"}),\\" instead.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`PUT my-index\\n{\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"number_one\\": {\\n \\"type\\": \\"integer\\",\\n \\"ignore_malformed\\": true\\n },\\n \\"number_two\\": {\\n \\"type\\": \\"integer\\"\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"If the mentioned index didn\\\\u2019t exist before and the previous command ran successfully, you should get the following result:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"acknowledged\\": true,\\n \\"shards_acknowledged\\": true,\\n \\"index\\": \\"my-index\\"\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"To double-check that the above mapping has been created correctly, we can query the newly created index with the command:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`GET my-index/_mapping\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You should get the following result:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"my-index\\": {\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"number_one\\": {\\n \\"type\\": \\"integer\\",\\n \\"ignore_malformed\\": true\\n },\\n \\"number_two\\": {\\n \\"type\\": \\"integer\\"\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now we can ingest two sample documents \\\\u2014 both invalid:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT my-index/_doc/1\\n{\\n \\"text\\": \\"Some text value\\",\\n \\"number_one\\": \\"foo\\"\\n}\\n\\nPUT my-index/_doc/2\\n{\\n \\"text\\": \\"Some text value\\",\\n \\"number_two\\": \\"foo\\"\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The document with \\",(0,n.jsx)(e.em,{children:\\"id=1\\"}),\\" is correctly ingested, while the document with \\",(0,n.jsx)(e.em,{children:\\"id=2\\"}),\\" fails with the following error. The difference between those two documents is in which field we are trying to ingest a sample string \\\\u201Cfoo\\\\u201D instead of an integer.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"error\\": {\\n \\"root_cause\\": [\\n {\\n \\"type\\": \\"document_parsing_exception\\",\\n \\"reason\\": \\"[3:17] failed to parse field [number_two] of type [integer] in document with id \'2\'. Preview of field\'s value: \'foo\'\\"\\n }\\n ],\\n \\"type\\": \\"document_parsing_exception\\",\\n \\"reason\\": \\"[3:17] failed to parse field [number_two] of type [integer] in document with id \'2\'. Preview of field\'s value: \'foo\'\\",\\n \\"caused_by\\": {\\n \\"type\\": \\"number_format_exception\\",\\n \\"reason\\": \\"For input string: \\\\\\\\\\"foo\\\\\\\\\\"\\"\\n }\\n },\\n \\"status\\": 400\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Depending on the client used for ingesting your documents, you might get different errors or warnings, but logically the problem is the same. The entire document is not ingested because part of it doesn\\\\u2019t conform with the index mapping. There are too many possible error messages to name, but suffice it to say that malformed data is quite a common problem. And we need a better way to handle it.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that at least one document has been ingested, you can try searching with the following query:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`GET my-index/_search\\n{\\n \\"fields\\": [\\n \\"*\\"\\n ]\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Here, the parameter \\",(0,n.jsx)(e.em,{children:\\"fields\\"}),\\" is required to show the values of those fields that have been ignored. More on this later.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"From the result, you can see that only the first document (with \\",(0,n.jsx)(e.em,{children:\\"id=1\\"}),\\") has been ingested correctly while the second document (with \\",(0,n.jsx)(e.em,{children:\\"id=2\\"}),\\") has been completely dropped.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"took\\": 14,\\n \\"timed_out\\": false,\\n \\"_shards\\": {\\n \\"total\\": 1,\\n \\"successful\\": 1,\\n \\"skipped\\": 0,\\n \\"failed\\": 0\\n },\\n \\"hits\\": {\\n \\"total\\": {\\n \\"value\\": 1,\\n \\"relation\\": \\"eq\\"\\n },\\n \\"max_score\\": null,\\n \\"hits\\": [\\n {\\n \\"_index\\": \\"my-index\\",\\n \\"_id\\": \\"1\\",\\n \\"_score\\": null,\\n \\"_ignored\\": [\\"number_one\\"],\\n \\"_source\\": {\\n \\"text\\": \\"Some text value\\",\\n \\"number_one\\": \\"foo\\"\\n },\\n \\"fields\\": {\\n \\"text\\": [\\"Some text value\\"],\\n \\"text.keyword\\": [\\"Some text value\\"]\\n },\\n \\"ignored_field_values\\": {\\n \\"number_one\\": [\\"foo\\"]\\n },\\n \\"sort\\": [\\"1\\"]\\n }\\n ]\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"From the above JSON response, you will notice some things, such as:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"A new field called _ \\",(0,n.jsx)(e.strong,{children:\\"_ignored\\"}),\\" _ of type array with the list of all fields that have been ignored while ingesting documents\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"A new field called _ \\",(0,n.jsx)(e.strong,{children:\\"ignored_field_values\\"}),\\" _ with a dictionary of ignored fields and their values\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"The field called __ \\",(0,n.jsx)(e.strong,{children:\\"source\\"}),\\" _ contains the original document unmodified. This is especially useful if you want to fix the problems with the mapping later.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"The field called _ \\",(0,n.jsx)(e.strong,{children:\\"text\\"}),\\" _ was not present in the original mapping, but it is now included since Elasticsearch automatically inferred the type of this field. In fact, if you try to query the mapping of the index _ \\",(0,n.jsx)(e.strong,{children:\\"my-index\\"}),\\" _ again via the command:\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`GET my-index/_mapping\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You should get this result:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"my-index\\": {\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"number_one\\": {\\n \\"type\\": \\"integer\\",\\n \\"ignore_malformed\\": true\\n },\\n \\"number_two\\": {\\n \\"type\\": \\"integer\\"\\n },\\n \\"text\\": {\\n \\"type\\": \\"text\\",\\n \\"fields\\": {\\n \\"keyword\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 256\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Finally, if you ingest some valid documents like the following command:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT my-index/_doc/3\\n{\\n \\"text\\": \\"Some text value\\",\\n \\"number_two\\": 10\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You can check how many documents have at least one ignored field with the following \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html\\",rel:\\"nofollow\\",children:\\"Exists query\\"}),\\":\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`GET my-index/_search\\n{\\n \\"query\\": {\\n \\"exists\\": {\\n \\"field\\": \\"_ignored\\"\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You can also see that out of the two documents ingested (with \\",(0,n.jsx)(e.em,{children:\\"id=1\\"}),\\" and \\",(0,n.jsx)(e.em,{children:\\"id=3\\"}),\\") only the document with \\",(0,n.jsx)(e.em,{children:\\"id=1\\"}),\\" contains an ignored field.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"took\\": 193,\\n \\"timed_out\\": false,\\n \\"_shards\\": {\\n \\"total\\": 1,\\n \\"successful\\": 1,\\n \\"skipped\\": 0,\\n \\"failed\\": 0\\n },\\n \\"hits\\": {\\n \\"total\\": {\\n \\"value\\": 1,\\n \\"relation\\": \\"eq\\"\\n },\\n \\"max_score\\": 1,\\n \\"hits\\": [\\n {\\n \\"_index\\": \\"my-index\\",\\n \\"_id\\": \\"1\\",\\n \\"_score\\": 1,\\n \\"_ignored\\": [\\"number_one\\"],\\n \\"_source\\": {\\n \\"text\\": \\"Some text value\\",\\n \\"number_one\\": \\"foo\\"\\n }\\n }\\n ]\\n }\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Alternatively, you can search for all documents that have a specific field being ignored with this \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html\\",rel:\\"nofollow\\",children:\\"Terms query\\"}),\\":\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`GET my-index/_search\\n{\\n \\"query\\": {\\n \\"terms\\": {\\n \\"_ignored\\": [ \\"number_one\\"]\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The result, in this case, will be the same as the previous one since we only managed to ingest a single document with that exact single field ignored.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Because we are a big fan of this flag, we\'ve enabled _ \\",(0,n.jsx)(e.strong,{children:\\"ignore_malformed\\"}),\\" _ by default for all Elastic integrations and in the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/core/template-resources/src/main/resources/logs-settings.json#L13\\",rel:\\"nofollow\\",children:\\"default index template for logs data streams\\"}),\\" as of 8.9.0. More information can be found in the official documentation for \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/8.9/ignore-malformed.html\\",rel:\\"nofollow\\",children:\\"ignore_malformed\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"And since I am personally working on this feature, I can reassure you that it is a game changer.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You can start by setting _ \\",(0,n.jsx)(e.strong,{children:\\"ignore_malformed\\"}),\\" _ on any cluster manually before Elastic Stack 8.9.0. Or you can use the defaults that we set for you starting from \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-enterprise-search-8-9-0\\",rel:\\"nofollow\\",children:\\"Elastic Stack 8.9.0\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(c,{...t})}):c(t)}return b(v);})();\\n;return Component;"},"_id":"articles/antidote-index-mapping-exceptions-ignore-malformed.mdx","_raw":{"sourceFilePath":"articles/antidote-index-mapping-exceptions-ignore-malformed.mdx","sourceFileName":"antidote-index-mapping-exceptions-ignore-malformed.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/antidote-index-mapping-exceptions-ignore-malformed"},"type":"Article","imageUrl":"/assets/images/antidote-index-mapping-exceptions-ignore-malformed/illustration-stack-modernize-solutions-1689x980_(1).png","readingTime":"12 min read","url":"/antidote-index-mapping-exceptions-ignore-malformed","headings":[{"level":2,"title":"How mappings work","href":"#how-mappings-work"},{"level":2,"title":"What happens when data is malformed?","href":"#what-happens-when-data-is-malformed"},{"level":2,"title":"A toy use case","href":"#a-toy-use-case"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Achieving seamless API management: Introducing AWS API Gateway integration with Elastic","slug":"api-management-aws-api-gateway-integration","date":"2023-09-14","description":"With Elastic\'s AWS API Gateway integration, application owners and developers unlock the capability to proactively identify and resolve problems, fine-tune resource utilization, and provide extraordinary digital experiences to their users.","image":"illustration-midnight-bg-aws-elastic-1680x980.png","author":[{"slug":"udayasimha-theepireddy-uday","type":"Author","_raw":{}},{"slug":"subhrata-kulshrestha","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"aws","type":"Tag","_raw":{}},{"slug":"aws-api-gateway","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}}],"body":{"raw":"\\n[AWS API Gateway](https://aws.amazon.com/api-gateway/) is a powerful service that redefines API management. It serves as a gateway for creating, deploying, and managing APIs, enabling businesses to establish seamless connections between different applications and services. With features like authentication, authorization, and traffic control, API Gateway ensures the security and reliability of API interactions.\\n\\nIn an era where APIs serve as the backbone of modern applications, having the means to maintain visibility and control over these vital components is absolutely essential. In this blog post, we dive deep into the comprehensive observability solution offered by Elastic\xae, ensuring real-time visibility, advanced analytics, and actionable insights, empowering you to fine-tune your API Gateway for optimal performance.\\n\\nFor application owners and developers, this integration stands as a beacon of empowerment. Elastic\'s meticulous orchestration of the seamless merging of metrics, logs, and traces, built upon the robust [ELK Stack](https://www.elastic.co/elastic-stack) foundation, equips them with potent real-time monitoring and analysis tools. These tools facilitate precise performance optimization and swift issue resolution, all within a secure and dependable environment.\\n\\nWith Elastic\'s AWS API Gateway integration, application owners and developers unlock the capability to proactively identify and resolve problems, fine-tune resource utilization, and provide extraordinary digital experiences to their users.\\n\\n## Architecture\\n\\n![architecture](/assets/images/api-management-aws-api-gateway-integration/elastic-blog-1-architecture.png)\\n\\n## Why the AWS API Gateway integration matters\\n\\nAPI Gateway now serves as the foundation of contemporary application development, simplifying the process of creating and overseeing APIs on a large scale. Yet, monitoring and troubleshooting these API endpoints can be challenging. With the new AWS API Gateway integration introduced by Elastic, you can gain the following:\\n\\n- **Unprecedented visibility:** Monitor your API Gateway endpoints\' performance, error rates, and usage metrics in real time. Get a comprehensive view of your APIs\' health and performance.\\n- **Log analysis:** Dive deep into API Gateway logs with ease. Our integration enables you to collect and analyze logs for HTTP, REST, and Websocket API types, helping you troubleshoot issues and gain valuable insights.\\n- **Rapid issue resolution:** Identify and resolve issues in your API Gateway workflows faster than ever. [Elastic Observability\'s](https://www.elastic.co/observability) powerful search and analytics tools help you pinpoint problems with ease.\\n- **Alerting and notifications:** Set up custom alerts based on API Gateway metrics and logs. Receive notifications when performance thresholds are breached, ensuring that you can take action promptly.\\n- **Optimized costs:** Visualize resource usage and performance metrics for your API Gateway deployments. Use these insights to optimize resource allocation and reduce operational costs.\\n- **Custom dashboards:** Create customized dashboards and visualizations tailored to your API Gateway monitoring needs. Stay in control with real-time data and actionable insights.\\n- **Effortless integration:** Seamlessly connect your AWS API Gateway to our observability solution. Our intuitive setup process ensures a smooth integration experience.\\n- **Scalability:** Whether you have a handful of APIs or a complex API Gateway landscape, our observability solution scales to meet your needs. Grow confidently as your API infrastructure expands.\\n\\n## How to get started\\n\\nGetting started with the AWS API Gateway integration in Elastic Observability is seamless. Here\'s a quick overview of the steps:\\n\\n### Prerequisites and configurations\\n\\nIf you intend to follow the steps outlined in this blog post, there are a few prerequisites and configurations that you should have in place beforehand.\\n\\n1. You will need an account on [Elastic Cloud](https://cloud.elastic.co/) and a deployed stack and agent. Instructions for deploying a stack on AWS can be found [here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html). This is necessary for AWS API Gateway logging and analysis.\\n\\n2. You will also need an AWS account with the necessary permissions to pull data from AWS. Details on the required permissions can be found in our [documentation](https://docs.elastic.co/en/integrations/aws#aws-permissions).\\n\\n3. You can monitor API execution by using CloudWatch, which collects and processes raw data from API Gateway into readable, near-real-time metrics and logs. Details on the required steps to enable logging can be found [here](https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-logging.html).\\n\\n### Step 1. Create an account with Elastic\\n\\n[Create an account on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home) by following the steps provided.\\n\\n### Step 2. Add integration\\n\\n- Log in to your Elastic Cloud deployment.\\n\\n![signup](/assets/images/api-management-aws-api-gateway-integration/elastic-blog-2-signup.png)\\n\\n- Click on **Add integrations**. You will be navigated to a catalog of supported integrations.\\n\\n![welcome home dashboard](/assets/images/api-management-aws-api-gateway-integration/elastic-blog-3-welcome-home.png)\\n\\n- Search and select **AWS API Gateway**.\\n\\n![Integration ](/assets/images/api-management-aws-api-gateway-integration/elastic-blog-4-integrations.png)\\n\\n### Step 3. Configure integration\\n\\n- Click on the **Add AWS API Gateway** button and provide the required details.\\n- If this is your first time adding an AWS integration, you’ll need to [configure and enroll the Elastic Agent](https://www.elastic.co/guide/en/fleet/current/elastic-agent-installation.html) on an AWS instance.\\n\\n![aws-api-gateway](/assets/images/api-management-aws-api-gateway-integration/elastic-blog-5-aws-api-gateway.png)\\n\\n- Then complete the “Configure integration” form, providing all the necessary information required for agents to collect the AWS API Gateway metrics and associated CloudWatch logs. Multiple AWS credential methods are supported, including access keys, temporary security credentials, and IAM role ARN. Please see the [IAM security and access documentation](https://docs.aws.amazon.com/apigateway/latest/developerguide/security-iam.html) for more details. You can choose to collect API Gateway metrics, API Gateway logs via S3, or API Gateway logs via CloudWatch.\\n- Click on the **Save and continue** button at the bottom of the page.\\n\\n![add-aws-integration](/assets/images/api-management-aws-api-gateway-integration/elastic-blog-6-add-aws-integration.png)\\n\\n### Step 4. Analyze and monitor\\n\\nExplore the data using the out-of-the-box dashboards available for the integration. Select **Discover** from the Elastic Cloud top-level menu.\\n\\n![discover-dashboard](/assets/images/api-management-aws-api-gateway-integration/elastic-blog-7-discover-dashboard.png)\\n\\nOr, create custom dashboards, set up alerts, and gain actionable insights into your API Gateway service performance.\\n\\nHere are key monitoring metrics collected through this integration across Rest APIs, HTTP APIs, and Websocket APIs:\\n\\n- **4XXError** – The number of client-side errors captured in a given period\\n- **5XXError** – The number of server-side errors captured in a given period\\n- **CacheHitCount** – The number of requests served from the API cache in a given period\\n- **CacheMissCount** – The number of requests served from the backend in a given period, when API caching is enabled\\n- **Count** – The total number of API requests in a given period\\n- **IntegrationLatency** – The time between when API Gateway relays a request to the backend and when it receives a response from the backend\\n- **Latency** – The time between when API Gateway receives a request from a client and when it returns a response to the client — the latency includes the integration latency and other API Gateway overhead\\n- **DataProcessed** – The amount of data processed in bytes\\n- **ConnectCount** – The number of messages sent to the $connect route integration \\n **MessageCount** – The number of messages sent to the WebSocket API, either from or to the client\\n\\n![graphs](/assets/images/api-management-aws-api-gateway-integration/elastic-blog-8-graphs.png)\\n\\n## Conclusion\\n\\nThe native integration of AWS API Gateway into Elastic Observability marks a significant advancement in streamlining the monitoring and management of your APIs. With this integration, you gain access to a wealth of insights, real-time visibility, and powerful analytics tools, empowering you to optimize your API performance, enhance security, and troubleshoot with ease. Don\'t miss out on this opportunity to take your API management to the next level, ensuring your digital assets operate at their best, all while providing a seamless experience for your users. Embrace this integration, and stay at the forefront of API observability in the ever-evolving world of digital technology.\\n\\nVisit our [documentation](https://docs.elastic.co/integrations/aws/apigateway) to learn more about Elastic Observability and the AWS API Gateway integration, or [contact our sales team](https://www.elastic.co/contact) to get started!\\n\\n## Start a free trial today\\n\\nStart your own [7-day free trial](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el) by signing up via [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da&sc_channel=el&ultron=gobig&hulk=regpage&blade=elasticweb&gambit=mp-b) and quickly spin up a deployment in minutes on any of the [Elastic Cloud regions on AWS](https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions) around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var g=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var i in e)o(n,i,{get:e[i],enumerable:!0})},s=(n,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of p(e))!w.call(n,a)&&a!==i&&o(n,a,{get:()=>e[a],enumerable:!(r=u(e,a))||r.enumerable});return n};var b=(n,e,i)=>(i=n!=null?g(m(n)):{},s(e||!n||!n.__esModule?o(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>s(o({},\\"__esModule\\",{value:!0}),n);var c=y((S,l)=>{l.exports=_jsx_runtime});var I={};f(I,{default:()=>h,frontmatter:()=>A});var t=b(c()),A={title:\\"Achieving seamless API management: Introducing AWS API Gateway integration with Elastic\\",slug:\\"api-management-aws-api-gateway-integration\\",date:\\"2023-09-14\\",description:\\"With Elastic\'s AWS API Gateway integration, application owners and developers unlock the capability to proactively identify and resolve problems, fine-tune resource utilization, and provide extraordinary digital experiences to their users.\\",author:[{slug:\\"udayasimha-theepireddy-uday\\"},{slug:\\"subhrata-kulshrestha\\"}],image:\\"illustration-midnight-bg-aws-elastic-1680x980.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"aws\\"},{slug:\\"aws-api-gateway\\"},{slug:\\"metrics\\"}]};function d(n){let e={a:\\"a\\",br:\\"br\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://aws.amazon.com/api-gateway/\\",rel:\\"nofollow\\",children:\\"AWS API Gateway\\"}),\\" is a powerful service that redefines API management. It serves as a gateway for creating, deploying, and managing APIs, enabling businesses to establish seamless connections between different applications and services. With features like authentication, authorization, and traffic control, API Gateway ensures the security and reliability of API interactions.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In an era where APIs serve as the backbone of modern applications, having the means to maintain visibility and control over these vital components is absolutely essential. In this blog post, we dive deep into the comprehensive observability solution offered by Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\", ensuring real-time visibility, advanced analytics, and actionable insights, empowering you to fine-tune your API Gateway for optimal performance.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For application owners and developers, this integration stands as a beacon of empowerment. Elastic\'s meticulous orchestration of the seamless merging of metrics, logs, and traces, built upon the robust \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/elastic-stack\\",rel:\\"nofollow\\",children:\\"ELK Stack\\"}),\\" foundation, equips them with potent real-time monitoring and analysis tools. These tools facilitate precise performance optimization and swift issue resolution, all within a secure and dependable environment.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With Elastic\'s AWS API Gateway integration, application owners and developers unlock the capability to proactively identify and resolve problems, fine-tune resource utilization, and provide extraordinary digital experiences to their users.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"architecture\\",children:\\"Architecture\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/api-management-aws-api-gateway-integration/elastic-blog-1-architecture.png\\",alt:\\"architecture\\",width:\\"1999\\",height:\\"558\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"why-the-aws-api-gateway-integration-matters\\",children:\\"Why the AWS API Gateway integration matters\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"API Gateway now serves as the foundation of contemporary application development, simplifying the process of creating and overseeing APIs on a large scale. Yet, monitoring and troubleshooting these API endpoints can be challenging. With the new AWS API Gateway integration introduced by Elastic, you can gain the following:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Unprecedented visibility:\\"}),\\" Monitor your API Gateway endpoints\' performance, error rates, and usage metrics in real time. Get a comprehensive view of your APIs\' health and performance.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Log analysis:\\"}),\\" Dive deep into API Gateway logs with ease. Our integration enables you to collect and analyze logs for HTTP, REST, and Websocket API types, helping you troubleshoot issues and gain valuable insights.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Rapid issue resolution:\\"}),\\" Identify and resolve issues in your API Gateway workflows faster than ever. \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability\'s\\"}),\\" powerful search and analytics tools help you pinpoint problems with ease.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Alerting and notifications:\\"}),\\" Set up custom alerts based on API Gateway metrics and logs. Receive notifications when performance thresholds are breached, ensuring that you can take action promptly.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Optimized costs:\\"}),\\" Visualize resource usage and performance metrics for your API Gateway deployments. Use these insights to optimize resource allocation and reduce operational costs.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Custom dashboards:\\"}),\\" Create customized dashboards and visualizations tailored to your API Gateway monitoring needs. Stay in control with real-time data and actionable insights.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Effortless integration:\\"}),\\" Seamlessly connect your AWS API Gateway to our observability solution. Our intuitive setup process ensures a smooth integration experience.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Scalability:\\"}),\\" Whether you have a handful of APIs or a complex API Gateway landscape, our observability solution scales to meet your needs. Grow confidently as your API infrastructure expands.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"how-to-get-started\\",children:\\"How to get started\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Getting started with the AWS API Gateway integration in Elastic Observability is seamless. Here\'s a quick overview of the steps:\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"prerequisites-and-configurations\\",children:\\"Prerequisites and configurations\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you intend to follow the steps outlined in this blog post, there are a few prerequisites and configurations that you should have in place beforehand.\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"You will need an account on \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack and agent. Instructions for deploying a stack on AWS can be found \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". This is necessary for AWS API Gateway logging and analysis.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"You will also need an AWS account with the necessary permissions to pull data from AWS. Details on the required permissions can be found in our \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#aws-permissions\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can monitor API execution by using CloudWatch, which collects and processes raw data from API Gateway into readable, near-real-time metrics and logs. Details on the required steps to enable logging can be found \\",(0,t.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-logging.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-create-an-account-with-elastic\\",children:\\"Step 1. Create an account with Elastic\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"Create an account on Elastic Cloud\\"}),\\" by following the steps provided.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-add-integration\\",children:\\"Step 2. Add integration\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Log in to your Elastic Cloud deployment.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/api-management-aws-api-gateway-integration/elastic-blog-2-signup.png\\",alt:\\"signup\\",width:\\"1671\\",height:\\"672\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Click on \\",(0,t.jsx)(e.strong,{children:\\"Add integrations\\"}),\\". You will be navigated to a catalog of supported integrations.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/api-management-aws-api-gateway-integration/elastic-blog-3-welcome-home.png\\",alt:\\"welcome home dashboard\\",width:\\"1999\\",height:\\"1088\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Search and select \\",(0,t.jsx)(e.strong,{children:\\"AWS API Gateway\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/api-management-aws-api-gateway-integration/elastic-blog-4-integrations.png\\",alt:\\"Integration \\",width:\\"1999\\",height:\\"803\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-3-configure-integration\\",children:\\"Step 3. Configure integration\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Click on the \\",(0,t.jsx)(e.strong,{children:\\"Add AWS API Gateway\\"}),\\" button and provide the required details.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"If this is your first time adding an AWS integration, you\\\\u2019ll need to \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/elastic-agent-installation.html\\",rel:\\"nofollow\\",children:\\"configure and enroll the Elastic Agent\\"}),\\" on an AWS instance.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/api-management-aws-api-gateway-integration/elastic-blog-5-aws-api-gateway.png\\",alt:\\"aws-api-gateway\\",width:\\"1999\\",height:\\"895\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Then complete the \\\\u201CConfigure integration\\\\u201D form, providing all the necessary information required for agents to collect the AWS API Gateway metrics and associated CloudWatch logs. Multiple AWS credential methods are supported, including access keys, temporary security credentials, and IAM role ARN. Please see the \\",(0,t.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/apigateway/latest/developerguide/security-iam.html\\",rel:\\"nofollow\\",children:\\"IAM security and access documentation\\"}),\\" for more details. You can choose to collect API Gateway metrics, API Gateway logs via S3, or API Gateway logs via CloudWatch.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Click on the \\",(0,t.jsx)(e.strong,{children:\\"Save and continue\\"}),\\" button at the bottom of the page.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/api-management-aws-api-gateway-integration/elastic-blog-6-add-aws-integration.png\\",alt:\\"add-aws-integration\\",width:\\"1999\\",height:\\"902\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-4-analyze-and-monitor\\",children:\\"Step 4. Analyze and monitor\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Explore the data using the out-of-the-box dashboards available for the integration. Select \\",(0,t.jsx)(e.strong,{children:\\"Discover\\"}),\\" from the Elastic Cloud top-level menu.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/api-management-aws-api-gateway-integration/elastic-blog-7-discover-dashboard.png\\",alt:\\"discover-dashboard\\",width:\\"495\\",height:\\"747\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Or, create custom dashboards, set up alerts, and gain actionable insights into your API Gateway service performance.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here are key monitoring metrics collected through this integration across Rest APIs, HTTP APIs, and Websocket APIs:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"4XXError\\"}),\\" \\\\u2013 The number of client-side errors captured in a given period\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"5XXError\\"}),\\" \\\\u2013 The number of server-side errors captured in a given period\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"CacheHitCount\\"}),\\" \\\\u2013 The number of requests served from the API cache in a given period\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"CacheMissCount\\"}),\\" \\\\u2013 The number of requests served from the backend in a given period, when API caching is enabled\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Count\\"}),\\" \\\\u2013 The total number of API requests in a given period\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"IntegrationLatency\\"}),\\" \\\\u2013 The time between when API Gateway relays a request to the backend and when it receives a response from the backend\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Latency\\"}),\\" \\\\u2013 The time between when API Gateway receives a request from a client and when it returns a response to the client \\\\u2014 the latency includes the integration latency and other API Gateway overhead\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"DataProcessed\\"}),\\" \\\\u2013 The amount of data processed in bytes\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"ConnectCount\\"}),\\" \\\\u2013 The number of messages sent to the $connect route integration\\",(0,t.jsx)(e.br,{}),`\\n`,(0,t.jsx)(e.strong,{children:\\"MessageCount\\"}),\\" \\\\u2013 The number of messages sent to the WebSocket API, either from or to the client\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/api-management-aws-api-gateway-integration/elastic-blog-8-graphs.png\\",alt:\\"graphs\\",width:\\"1999\\",height:\\"1792\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The native integration of AWS API Gateway into Elastic Observability marks a significant advancement in streamlining the monitoring and management of your APIs. With this integration, you gain access to a wealth of insights, real-time visibility, and powerful analytics tools, empowering you to optimize your API performance, enhance security, and troubleshoot with ease. Don\'t miss out on this opportunity to take your API management to the next level, ensuring your digital assets operate at their best, all while providing a seamless experience for your users. Embrace this integration, and stay at the forefront of API observability in the ever-evolving world of digital technology.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Visit our \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/aws/apigateway\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\" to learn more about Elastic Observability and the AWS API Gateway integration, or \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/contact\\",rel:\\"nofollow\\",children:\\"contact our sales team\\"}),\\" to get started!\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"start-a-free-trial-today\\",children:\\"Start a free trial today\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Start your own \\",(0,t.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"7-day free trial\\"}),\\" by signing up via \\",(0,t.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da&sc_channel=el&ultron=gobig&hulk=regpage&blade=elasticweb&gambit=mp-b\\",rel:\\"nofollow\\",children:\\"AWS Marketplace\\"}),\\" and quickly spin up a deployment in minutes on any of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions\\",rel:\\"nofollow\\",children:\\"Elastic Cloud regions on AWS\\"}),\\" around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return v(I);})();\\n;return Component;"},"_id":"articles/api-management-aws-api-gateway-integration-elastic.mdx","_raw":{"sourceFilePath":"articles/api-management-aws-api-gateway-integration-elastic.mdx","sourceFileName":"api-management-aws-api-gateway-integration-elastic.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/api-management-aws-api-gateway-integration-elastic"},"type":"Article","imageUrl":"/assets/images/api-management-aws-api-gateway-integration/illustration-midnight-bg-aws-elastic-1680x980.png","readingTime":"7 min read","url":"/api-management-aws-api-gateway-integration","headings":[{"level":2,"title":"Architecture","href":"#architecture"},{"level":2,"title":"Why the AWS API Gateway integration matters","href":"#why-the-aws-api-gateway-integration-matters"},{"level":2,"title":"How to get started","href":"#how-to-get-started"},{"level":3,"title":"Prerequisites and configurations","href":"#prerequisites-and-configurations"},{"level":3,"title":"Step 1. Create an account with Elastic","href":"#step-1-create-an-account-with-elastic"},{"level":3,"title":"Step 2. Add integration","href":"#step-2-add-integration"},{"level":3,"title":"Step 3. Configure integration","href":"#step-3-configure-integration"},{"level":3,"title":"Step 4. Analyze and monitor","href":"#step-4-analyze-and-monitor"},{"level":2,"title":"Conclusion","href":"#conclusion"},{"level":2,"title":"Start a free trial today","href":"#start-a-free-trial-today"}]},{"title":"Auto-instrumentation of Go applications with OpenTelemetry","slug":"auto-instrumentation-go-applications-opentelemetry","date":"2024-10-02","description":"Instrumenting Go applications with OpenTelemetry provides insights into application performance, dependencies, and errors. We\'ll show you how to automatically instrument a Go application using Docker, with no changes to your application code.","image":"observability-launch-series-3-go-auto.jpg","author":[{"slug":"damien-mathieu","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"go","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the fast-paced universe of software development, especially in the\\ncloud-native realm, DevOps and SRE teams are increasingly emerging as essential\\npartners in application stability and growth.\\n\\nDevOps engineers continuously optimize software delivery, while SRE teams act\\nas the stewards of application reliability, scalability, and top-tier\\nperformance. The challenge? These teams require a cutting-edge observability\\nsolution, one that encompasses full-stack insights, empowering them to rapidly\\nmanage, monitor, and rectify potential disruptions before they culminate into\\noperational challenges.\\n\\nObservability in our modern distributed software ecosystem goes beyond mere\\nmonitoring — it demands limitless data collection, precision in processing, and\\nthe correlation of this data into actionable insights. However, the road to\\nachieving this holistic view is paved with obstacles, from navigating version\\nincompatibilities to wrestling with restrictive proprietary code.\\n\\nEnter [OpenTelemetry (OTel)](https://opentelemetry.io/), with the following\\nbenefits for those who adopt it:\\n\\n- Escape vendor constraints with OTel, freeing yourself from vendor lock-in and\\n\\tensuring top-notch observability.\\n- See the harmony of unified logs, metrics, and traces come together to provide\\n\\ta complete system view.\\n- Improve your application oversight through richer and enhanced\\n\\tinstrumentations.\\n- Embrace the benefits of backward compatibility to protect your prior\\n\\tinstrumentation investments.\\n- Embark on the OpenTelemetry journey with an easy learning curve, simplifying\\n\\tonboarding and scalability.\\n- Rely on a proven, future-ready standard to boost your confidence in every\\n\\tinvestment.\\n\\nIn this blog, we will explore how you can use [automatic instrumentation in\\nyour Go](https://github.com/open-telemetry/opentelemetry-go-instrumentation/)\\napplication using Docker, without the need to refactor any part of your\\napplication code. We will use an [application called\\nElastiflix](https://github.com/elastic/observability-examples), which helps\\nhighlight auto-instrumentation in a simple way.\\n\\n## Application, prerequisites, and config\\n\\nThe application that we use for this blog is called\\n[Elastiflix](https://github.com/elastic/observability-examples), a\\nmovie-streaming application. It consists of several micro-services written in\\n.NET, NodeJS, Go, and Python.\\n\\nBefore we instrument our sample application, we will first need to understand\\nhow Elastic can receive the telemetry data.\\n\\n![Elastic configuration options for\\nOpenTelemetry](/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-1-config.png)\\n\\nAll of Elastic Observability’s APM capabilities are available with OTel data.\\nSome of these include:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services, distributed tracing\\n- Transactions (traces)\\n- Machine learning (ML) correlations\\n- Log correlation\\n\\nIn addition to Elastic’s APM and a unified view of the telemetry data, you will\\nalso be able to use Elastic’s powerful machine learning capabilities to reduce\\nthe analysis, and alerting to help reduce MTTR.\\n\\n### Prerequisites\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/).\\n- A clone of the [Elastiflix demo application](https://github.com/elastic/observability-examples), or your own Go application\\n- Basic understanding of Docker — potentially install [Docker Desktop](https://www.docker.com/products/docker-desktop/)\\n- Basic understanding of Go\\n\\n### View the example source code\\n\\nThe full source code, including the Dockerfile used in this blog, can be found\\non\\n[GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite).\\n\\nThe following steps will show you how to instrument this application and run it\\non the command line or in Docker. If you are interested in a more complete OTel\\nexample, take a look at the docker-compose file\\n[here](https://github.com/elastic/observability-examples/tree/main#start-the-app),\\nwhich will bring up the full project.\\n\\n## Step-by-step guide\\n\\n### Step 0. Log in to your Elastic Cloud account\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the\\n[instructions to get started on Elastic\\nCloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![free trial](/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-2-trial.png)\\n\\n### Step 1. Run the Docker Image with auto-instrumentation\\n\\nWe are going to use automatic instrumentation with the Go service from the\\n[Elastiflix demo\\napplication](https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite).\\n\\nWe will be using the following service from Elastiflix:\\n\\n```bash\\nElastiflix/go-favorite\\n```\\n\\nPer the [OpenTelemetry Automatic Instrumentation for Go\\ndocumentation](https://github.com/open-telemetry/opentelemetry-go-instrumentation/blob/main/docs/getting-started.md),\\nyou will configure the application to be auto-instrumented using\\ndocker-compose.\\n\\n\\nAs specified in the [OTEL Go\\ndocumentation](https://github.com/open-telemetry/opentelemetry-go-instrumentation/blob/main/docs/getting-started.md),\\nwe will use environment variables and pass in the configuration values to\\nenable it to connect with [Elastic Observability’s APM\\nserver](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html).\\n\\nBecause Elastic accepts OTLP natively, we just need to provide the Endpoint and\\nauthentication where the OTEL Exporter needs to send the data, as well as some\\nother environment variables.\\n\\n**Getting Elastic Cloud variables**\\nYou can copy the endpoints and token from Kibana under the path `/app/apm/onboarding?agent=openTelemetry`.\\n\\n![apm agents](/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-3-apm-agents.png)\\n\\nYou will need to copy the following environment variables:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n```\\n\\nUpdate the `docker-compose.yml` file at the top of the `Elastiflix` repository,\\nadding a `go-auto` service and updating the `favorite-go` one:\\n\\n```yaml\\n favorite-go:\\n build: go-favorite/.\\n image: docker.elastic.co/demos/workshop/observability/elastiflix-go-favorite:${ELASTIC_VERSION}-${BUILD_NUMBER}\\n depends_on:\\n - redis\\n networks:\\n - app-network\\n ports:\\n - \\"5001:5000\\"\\n environment:\\n - REDIS_HOST=redis\\n - TOGGLE_SERVICE_DELAY=${TOGGLE_SERVICE_DELAY:-0}\\n - TOGGLE_CANARY_DELAY=${TOGGLE_CANARY_DELAY:-0}\\n - TOGGLE_CANARY_FAILURE=${TOGGLE_CANARY_FAILURE:-0}\\n volumes:\\n - favorite-go:/app\\n go-auto:\\n image: otel/autoinstrumentation-go\\n privileged: true\\n pid: \\"host\\"\\n networks:\\n - app-network\\n environment:\\n OTEL_EXPORTER_OTLP_ENDPOINT: \\"REPLACE WITH OTEL_EXPORTER_OTLP_ENDPOINT\\"\\n OTEL_EXPORTER_OTLP_HEADERS: \\"REPLACE WITH OTEL_EXPORTER_OTLP_HEADERS\\"\\n OTEL_GO_AUTO_TARGET_EXE: \\"/app/main\\"\\n OTEL_SERVICE_NAME: \\"go-favorite\\"\\n OTEL_PROPAGATORS: \\"tracecontext,baggage\\"\\n volumes:\\n - favorite-go:/app\\n - /proc:/host/proc\\n```\\n\\nAnd, at the bottom of the file:\\n\\n```yaml\\nvolumes:\\n favorite-go:\\nnetworks:\\n app-network:\\n driver: bridge\\n```\\n\\nFinally, in the configuration for the main node app, you will want to tell Elastiflix to call the Go favorites app by replacing the line:\\n\\n```yaml\\nenvironment:\\n - API_ENDPOINT_FAVORITES=favorite-java:5000\\n```\\n\\nwith:\\n\\n```yaml\\nenvironment:\\n - API_ENDPOINT_FAVORITES=favorite-go:5000\\n```\\n\\n### Step 3: Explore traces and logs in Elastic APM\\n\\nOnce you have this up and running, you can ping the endpoint for your\\ninstrumented service (in our case, this is /favorites), and you should see the\\napp appear in Elastic APM, as shown below:\\n\\n![services](/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-4-services.png)\\n\\nIt will begin by tracking throughput and latency critical metrics for SREs to\\npay attention to.\\n\\nDigging in, we can see an overview of all our Transactions.\\n\\n![services-2](/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-5-services2.png)\\n\\nAnd look at specific transactions:\\n\\n![graph colored lines](/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-6-graph-colored.png)\\n\\nThis gives you complete visibility across metrics, and traces!\\n\\n## Summary\\n\\nWith this Dockerfile, you\'ve transformed your simple Go application into one\\nthat\'s automatically instrumented with OpenTelemetry. This will aid greatly in\\nunderstanding application performance, tracing errors, and gaining insights\\ninto how users interact with your software.\\n\\nRemember, observability is a crucial aspect of modern application development,\\nespecially in distributed systems. With tools like OpenTelemetry, understanding\\ncomplex systems becomes a tad bit easier.\\n\\nIn this blog, we discussed the following:\\n\\n- How to auto-instrument Go with OpenTelemetry.\\n- Using standard commands in a Docker file, auto-instrumentation was done\\n\\tefficiently and without adding code in multiple places enabling\\n\\tmanageability.\\n- Using OpenTelemetry and its support for multiple languages, DevOps and SRE\\n\\tteams can auto-instrument their applications with ease gaining immediate\\n\\tinsights into the health of the entire application stack and reduce mean time\\n\\tto resolution (MTTR).\\n\\nSince Elastic can support a mix of methods for ingesting data, whether it be\\nusing auto-instrumentation of open-source OpenTelemetry or manual\\ninstrumentation with its native APM agents, you can plan your migration to OTel\\nby focusing on a few applications first and then using OpenTelemety across your\\napplications later on in a manner that best fits your business needs.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-go-applications-opentelemetry) [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var d=Object.create;var l=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var i in e)l(t,i,{get:e[i],enumerable:!0})},r=(t,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!f.call(t,o)&&o!==i&&l(t,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return t};var b=(t,e,i)=>(i=t!=null?d(g(t)):{},r(e||!t||!t.__esModule?l(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>r(l({},\\"__esModule\\",{value:!0}),t);var c=w((A,s)=>{s.exports=_jsx_runtime});var T={};y(T,{default:()=>p,frontmatter:()=>E});var n=b(c()),E={title:\\"Auto-instrumentation of Go applications with OpenTelemetry\\",slug:\\"auto-instrumentation-go-applications-opentelemetry\\",date:\\"2024-10-02\\",description:\\"Instrumenting Go applications with OpenTelemetry provides insights into application performance, dependencies, and errors. We\'ll show you how to automatically instrument a Go application using Docker, with no changes to your application code.\\",author:[{slug:\\"damien-mathieu\\"}],image:\\"observability-launch-series-3-go-auto.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"go\\"},{slug:\\"apm\\"},{slug:\\"instrumentation\\"}]};function h(t){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:`In the fast-paced universe of software development, especially in the\\ncloud-native realm, DevOps and SRE teams are increasingly emerging as essential\\npartners in application stability and growth.`}),`\\n`,(0,n.jsx)(e.p,{children:`DevOps engineers continuously optimize software delivery, while SRE teams act\\nas the stewards of application reliability, scalability, and top-tier\\nperformance. The challenge? These teams require a cutting-edge observability\\nsolution, one that encompasses full-stack insights, empowering them to rapidly\\nmanage, monitor, and rectify potential disruptions before they culminate into\\noperational challenges.`}),`\\n`,(0,n.jsx)(e.p,{children:`Observability in our modern distributed software ecosystem goes beyond mere\\nmonitoring \\\\u2014 it demands limitless data collection, precision in processing, and\\nthe correlation of this data into actionable insights. However, the road to\\nachieving this holistic view is paved with obstacles, from navigating version\\nincompatibilities to wrestling with restrictive proprietary code.`}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Enter \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry (OTel)\\"}),`, with the following\\nbenefits for those who adopt it:`]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:`Escape vendor constraints with OTel, freeing yourself from vendor lock-in and\\nensuring top-notch observability.`}),`\\n`,(0,n.jsx)(e.li,{children:`See the harmony of unified logs, metrics, and traces come together to provide\\na complete system view.`}),`\\n`,(0,n.jsx)(e.li,{children:`Improve your application oversight through richer and enhanced\\ninstrumentations.`}),`\\n`,(0,n.jsx)(e.li,{children:`Embrace the benefits of backward compatibility to protect your prior\\ninstrumentation investments.`}),`\\n`,(0,n.jsx)(e.li,{children:`Embark on the OpenTelemetry journey with an easy learning curve, simplifying\\nonboarding and scalability.`}),`\\n`,(0,n.jsx)(e.li,{children:`Rely on a proven, future-ready standard to boost your confidence in every\\ninvestment.`}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In this blog, we will explore how you can use \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-go-instrumentation/\\",rel:\\"nofollow\\",children:`automatic instrumentation in\\nyour Go`}),`\\napplication using Docker, without the need to refactor any part of your\\napplication code. We will use an `,(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:`application called\\nElastiflix`}),`, which helps\\nhighlight auto-instrumentation in a simple way.`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[`The application that we use for this blog is called\\n`,(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),`, a\\nmovie-streaming application. It consists of several micro-services written in\\n.NET, NodeJS, Go, and Python.`]}),`\\n`,(0,n.jsx)(e.p,{children:`Before we instrument our sample application, we will first need to understand\\nhow Elastic can receive the telemetry data.`}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-1-config.png\\",alt:`Elastic configuration options for\\nOpenTelemetry`,width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,n.jsx)(e.p,{children:`All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data.\\nSome of these include:`}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Dependencies between services, distributed tracing\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Machine learning (ML) correlations\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Log correlation\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:`In addition to Elastic\\\\u2019s APM and a unified view of the telemetry data, you will\\nalso be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce\\nthe analysis, and alerting to help reduce MTTR.`}),`\\n`,(0,n.jsx)(e.h3,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"A clone of the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\", or your own Go application\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Basic understanding of Docker \\\\u2014 potentially install \\",(0,n.jsx)(e.a,{href:\\"https://www.docker.com/products/docker-desktop/\\",rel:\\"nofollow\\",children:\\"Docker Desktop\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Basic understanding of Go\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[`The full source code, including the Dockerfile used in this blog, can be found\\non\\n`,(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[`The following steps will show you how to instrument this application and run it\\non the command line or in Docker. If you are interested in a more complete OTel\\nexample, take a look at the docker-compose file\\n`,(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main#start-the-app\\",rel:\\"nofollow\\",children:\\"here\\"}),`,\\nwhich will bring up the full project.`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-0-log-in-to-your-elastic-cloud-account\\",children:\\"Step 0. Log in to your Elastic Cloud account\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[`This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the\\n`,(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:`instructions to get started on Elastic\\nCloud`}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-2-trial.png\\",alt:\\"free trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-1-run-the-docker-image-with-auto-instrumentation\\",children:\\"Step 1. Run the Docker Image with auto-instrumentation\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[`We are going to use automatic instrumentation with the Go service from the\\n`,(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite\\",rel:\\"nofollow\\",children:`Elastiflix demo\\napplication`}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"We will be using the following service from Elastiflix:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`Elastiflix/go-favorite\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Per the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-go-instrumentation/blob/main/docs/getting-started.md\\",rel:\\"nofollow\\",children:`OpenTelemetry Automatic Instrumentation for Go\\ndocumentation`}),`,\\nyou will configure the application to be auto-instrumented using\\ndocker-compose.`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"As specified in the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-go-instrumentation/blob/main/docs/getting-started.md\\",rel:\\"nofollow\\",children:`OTEL Go\\ndocumentation`}),`,\\nwe will use environment variables and pass in the configuration values to\\nenable it to connect with `,(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html\\",rel:\\"nofollow\\",children:`Elastic Observability\\\\u2019s APM\\nserver`}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:`Because Elastic accepts OTLP natively, we just need to provide the Endpoint and\\nauthentication where the OTEL Exporter needs to send the data, as well as some\\nother environment variables.`}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Getting Elastic Cloud variables\\"}),`\\nYou can copy the endpoints and token from Kibana under the path `,(0,n.jsx)(e.code,{children:\\"/app/apm/onboarding?agent=openTelemetry\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-3-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1924\\",height:\\"1304\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You will need to copy the following environment variables:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Update the \\",(0,n.jsx)(e.code,{children:\\"docker-compose.yml\\"}),\\" file at the top of the \\",(0,n.jsx)(e.code,{children:\\"Elastiflix\\"}),` repository,\\nadding a `,(0,n.jsx)(e.code,{children:\\"go-auto\\"}),\\" service and updating the \\",(0,n.jsx)(e.code,{children:\\"favorite-go\\"}),\\" one:\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:` favorite-go:\\n build: go-favorite/.\\n image: docker.elastic.co/demos/workshop/observability/elastiflix-go-favorite:\\\\${ELASTIC_VERSION}-\\\\${BUILD_NUMBER}\\n depends_on:\\n - redis\\n networks:\\n - app-network\\n ports:\\n - \\"5001:5000\\"\\n environment:\\n - REDIS_HOST=redis\\n - TOGGLE_SERVICE_DELAY=\\\\${TOGGLE_SERVICE_DELAY:-0}\\n - TOGGLE_CANARY_DELAY=\\\\${TOGGLE_CANARY_DELAY:-0}\\n - TOGGLE_CANARY_FAILURE=\\\\${TOGGLE_CANARY_FAILURE:-0}\\n volumes:\\n - favorite-go:/app\\n go-auto:\\n image: otel/autoinstrumentation-go\\n privileged: true\\n pid: \\"host\\"\\n networks:\\n - app-network\\n environment:\\n OTEL_EXPORTER_OTLP_ENDPOINT: \\"REPLACE WITH OTEL_EXPORTER_OTLP_ENDPOINT\\"\\n OTEL_EXPORTER_OTLP_HEADERS: \\"REPLACE WITH OTEL_EXPORTER_OTLP_HEADERS\\"\\n OTEL_GO_AUTO_TARGET_EXE: \\"/app/main\\"\\n OTEL_SERVICE_NAME: \\"go-favorite\\"\\n OTEL_PROPAGATORS: \\"tracecontext,baggage\\"\\n volumes:\\n - favorite-go:/app\\n - /proc:/host/proc\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"And, at the bottom of the file:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`volumes:\\n favorite-go:\\nnetworks:\\n app-network:\\n driver: bridge\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Finally, in the configuration for the main node app, you will want to tell Elastiflix to call the Go favorites app by replacing the line:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`environment:\\n - API_ENDPOINT_FAVORITES=favorite-java:5000\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"with:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`environment:\\n - API_ENDPOINT_FAVORITES=favorite-go:5000\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-3-explore-traces-and-logs-in-elastic-apm\\",children:\\"Step 3: Explore traces and logs in Elastic APM\\"}),`\\n`,(0,n.jsx)(e.p,{children:`Once you have this up and running, you can ping the endpoint for your\\ninstrumented service (in our case, this is /favorites), and you should see the\\napp appear in Elastic APM, as shown below:`}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-4-services.png\\",alt:\\"services\\",width:\\"2262\\",height:\\"808\\"})}),`\\n`,(0,n.jsx)(e.p,{children:`It will begin by tracking throughput and latency critical metrics for SREs to\\npay attention to.`}),`\\n`,(0,n.jsx)(e.p,{children:\\"Digging in, we can see an overview of all our Transactions.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-5-services2.png\\",alt:\\"services-2\\",width:\\"1971\\",height:\\"896\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"And look at specific transactions:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-go-applications-opentelemetry/elastic-blog-6-graph-colored.png\\",alt:\\"graph colored lines\\",width:\\"1965\\",height:\\"708\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This gives you complete visibility across metrics, and traces!\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,n.jsx)(e.p,{children:`With this Dockerfile, you\'ve transformed your simple Go application into one\\nthat\'s automatically instrumented with OpenTelemetry. This will aid greatly in\\nunderstanding application performance, tracing errors, and gaining insights\\ninto how users interact with your software.`}),`\\n`,(0,n.jsx)(e.p,{children:`Remember, observability is a crucial aspect of modern application development,\\nespecially in distributed systems. With tools like OpenTelemetry, understanding\\ncomplex systems becomes a tad bit easier.`}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"How to auto-instrument Go with OpenTelemetry.\\"}),`\\n`,(0,n.jsx)(e.li,{children:`Using standard commands in a Docker file, auto-instrumentation was done\\nefficiently and without adding code in multiple places enabling\\nmanageability.`}),`\\n`,(0,n.jsx)(e.li,{children:`Using OpenTelemetry and its support for multiple languages, DevOps and SRE\\nteams can auto-instrument their applications with ease gaining immediate\\ninsights into the health of the entire application stack and reduce mean time\\nto resolution (MTTR).`}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:`Since Elastic can support a mix of methods for ingesting data, whether it be\\nusing auto-instrumentation of open-source OpenTelemetry or manual\\ninstrumentation with its native APM agents, you can plan your migration to OTel\\nby focusing on a few applications first and then using OpenTelemety across your\\napplications later on in a manner that best fits your business needs.`}),`\\n`,(0,n.jsxs)(e.blockquote,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Python: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Java: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Node.js: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\".NET: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Go: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\" \\",(0,n.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function p(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return v(T);})();\\n;return Component;"},"_id":"articles/auto-instrument-go-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/auto-instrument-go-applications-opentelemetry.mdx","sourceFileName":"auto-instrument-go-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/auto-instrument-go-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/auto-instrumentation-go-applications-opentelemetry/observability-launch-series-3-go-auto.jpg","readingTime":"8 min read","url":"/auto-instrumentation-go-applications-opentelemetry","headings":[{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":3,"title":"Prerequisites","href":"#prerequisites"},{"level":3,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":3,"title":"Step 0. Log in to your Elastic Cloud account","href":"#step-0-log-in-to-your-elastic-cloud-account"},{"level":3,"title":"Step 1. Run the Docker Image with auto-instrumentation","href":"#step-1-run-the-docker-image-with-auto-instrumentation"},{"level":3,"title":"Step 3: Explore traces and logs in Elastic APM","href":"#step-3-explore-traces-and-logs-in-elastic-apm"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Automatic instrumentation with OpenTelemetry for Node.js applications","slug":"auto-instrument-nodejs-apps-opentelemetry","date":"2023-08-30","description":"Learn how to auto-instrument Node.js applications using OpenTelemetry. With standard commands in a Docker file, applications can be instrumented quickly without writing code in multiple places, enabling rapid change, scale, and easier management.","image":"observability-launch-series-1-node-js-auto_(1).jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"nodejs","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nDevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\n\\nObservability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers — a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\n\\nThanks to [OpenTelemetry](https://opentelemetry.io) (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and have a large support community reducing vendor lock-in.\\n\\nIn a [previous blog](https://www.elastic.co/blog/opentelemetry-observability), we also reviewed how to use the [OpenTelemetry demo](https://github.com/elastic/opentelemetry-demo) and connect it to Elastic\xae, as well as some of Elastic’s capabilities with OpenTelemetry and Kubernetes.\\n\\nIn this blog, we will show how to use [automatic instrumentation for OpenTelemetry](https://opentelemetry.io/docs/instrumentation/js/automatic/) with the Node.js service of our [application called Elastiflix](https://github.com/elastic/observability-examples), which helps highlight auto-instrumentation in a simple way.\\n\\nThe beauty of this is that there is **no need for the otel-collector**! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\n\\n## Application, prerequisites, and config\\n\\nThe application that we use for this blog is called [Elastiflix](https://github.com/elastic/observability-examples), a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\n\\nBefore we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\n\\n![options](/assets/images/auto-instrument-nodejs-apps-opentelemetry/elastic-blog-1-otel-config-options.png)\\n\\nAll of Elastic Observability’s APM capabilities are available with OTel data. Some of these include:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services, distributed tracing\\n- Transactions (traces)\\n- Machine learning (ML) correlations\\n- Log correlation\\n\\nIn addition to Elastic’s APM and a unified view of the telemetry data, you will also be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\n\\n### Prerequisites\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/)\\n- A clone of the [Elastiflix demo application](https://github.com/elastic/observability-examples), or your own **Node.js** application\\n- Basic understanding of Docker — potentially install [Docker Desktop](https://www.docker.com/products/docker-desktop/)\\n- Basic understanding of Node.js\\n\\n### View the example source code\\n\\nThe full source code, including the Dockerfile used in this blog, can be found on [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto). The repository also contains the [same application without instrumentation](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite). This allows you to compare each file and see the differences.\\n\\nThe following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file [here](https://github.com/elastic/observability-examples/tree/main#start-the-app), which will bring up the full project.\\n\\n## Step-by-step guide\\n\\n### Step 0. Log in to your Elastic Cloud account\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the [instructions to get started on Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![free trial](/assets/images/auto-instrument-nodejs-apps-opentelemetry/elastic-blog-2-free-trial.png)\\n\\n### Step 1. Configure auto-instrumentation for the Node.js Service\\n\\nWe are going to use automatic instrumentation with Node.js service from the [Elastiflix demo application](https://github.com/elastic/observability-examples).\\n\\nWe will be using the following service from Elastiflix:\\n\\n```bash\\nElastiflix/node-server-otel-manual\\n```\\n\\nPer the [OpenTelemetry JavaScript documentation](https://opentelemetry.io/docs/instrumentation/js/automatic/) and [@open-telemetry/auto-instrumentions-node](https://www.npmjs.com/package/@opentelemetry/auto-instrumentations-node) documentation, you will simply install the appropriate node packages using npm.\\n\\n```bash\\nnpm install --save @opentelemetry/api\\nnpm install --save @opentelemetry/auto-instrumentations-node\\n```\\n\\nIf you are running the Node.js service on the command line, then here is how you can run auto-instrument with Node.js.\\n\\n```bash\\nnode --require \'@opentelemetry/auto-instrumentations-node/register\' app.js\\n```\\n\\nFor our application, we do this as part of the Dockerfile.\\n\\n**Dockerfile**\\n\\n```dockerfile\\nFROM node:14\\n\\nWORKDIR /app\\n\\nCOPY [\\"package.json\\", \\"./\\"]\\nRUN ls\\nRUN npm install --production\\nCOPY . .\\n\\nRUN npm install --save @opentelemetry/api\\nRUN npm install --save @opentelemetry/auto-instrumentations-node\\n\\n\\nEXPOSE 3001\\n\\nCMD [\\"node\\", \\"--require\\", \\"@opentelemetry/auto-instrumentations-node/register\\", \\"index.js\\"]\\n```\\n\\n### Step 2. Running the Docker image with environment variables\\n\\nAs specified in the [OTEL documentation](https://opentelemetry.io/docs/instrumentation/python/automatic/#configuring-the-agent), we will use environment variables and pass in the configuration values to enable it to connect with [Elastic Observability’s APM server](https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html).\\n\\nBecause Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\n\\n**Getting Elastic Cloud variables** \\nYou can copy the endpoints and token from Kibana\xae under the path /app/home#/tutorial/apm.\\n\\n![apm agents](/assets/images/auto-instrument-nodejs-apps-opentelemetry/elastic-blog-3-apm-agents.png)\\n\\nYou will need to copy the following environment variables:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n```\\n\\n**Build the image**\\n\\n```bash\\ndocker build -t node-otel-auto-image .\\n```\\n\\n**Run the image**\\n\\n```bash\\ndocker run \\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\" \\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\n -e OTEL_SERVICE_NAME=\\"node-server-otel-auto\\" \\\\\\n -p 3001:3001 \\\\\\n node-server-otel-auto\\n```\\n\\nYou can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on some downstream services that you may not have running on your machine.\\n\\n```bash\\ncurl localhost:3001/api/login\\ncurl localhost:3001/api/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:3001/api/favorites\\"; sleep 1; done;\\n```\\n\\n### Step 3: Explore traces, metrics, and logs in Elastic APM\\n\\nExploring the Services section in Elastic APM, you’ll see the Node service displayed.\\n\\n![services](/assets/images/auto-instrument-nodejs-apps-opentelemetry/elastic-blog-4-services.png)\\n\\nClicking on the node-server-otel-auto service, you can see that it is ingesting telemetry data using OpenTelemetry.\\n\\n## Summary\\n\\nIn this blog, we discussed the following:\\n\\n- How to auto-instrument Node.js with OpenTelemetry\\n- Using standard commands in a Dockerfile, auto-instrumentation was done efficiently and without adding code in multiple places enabling manageability\\n\\nSince Elastic can support a mix of methods for ingesting data, whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-apps-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var l=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var i in e)l(t,i,{get:e[i],enumerable:!0})},r=(t,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!f.call(t,o)&&o!==i&&l(t,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return t};var b=(t,e,i)=>(i=t!=null?p(g(t)):{},r(e||!t||!t.__esModule?l(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>r(l({},\\"__esModule\\",{value:!0}),t);var c=w((k,s)=>{s.exports=_jsx_runtime});var T={};y(T,{default:()=>d,frontmatter:()=>E});var n=b(c()),E={title:\\"Automatic instrumentation with OpenTelemetry for Node.js applications\\",slug:\\"auto-instrument-nodejs-apps-opentelemetry\\",date:\\"2023-08-30\\",description:\\"Learn how to auto-instrument Node.js applications using OpenTelemetry. With standard commands in a Docker file, applications can be instrumented quickly without writing code in multiple places, enabling rapid change, scale, and easier management.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"observability-launch-series-1-node-js-auto_(1).jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"nodejs\\"},{slug:\\"apm\\"},{slug:\\"instrumentation\\"}]};function h(t){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"DevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Observability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers \\\\u2014 a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Thanks to \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and have a large support community reducing vendor lock-in.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\", we also reviewed how to use the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry demo\\"}),\\" and connect it to Elastic\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\", as well as some of Elastic\\\\u2019s capabilities with OpenTelemetry and Kubernetes.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In this blog, we will show how to use \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/js/automatic/\\",rel:\\"nofollow\\",children:\\"automatic instrumentation for OpenTelemetry\\"}),\\" with the Node.js service of our \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"application called Elastiflix\\"}),\\", which helps highlight auto-instrumentation in a simple way.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The beauty of this is that there is \\",(0,n.jsx)(e.strong,{children:\\"no need for the otel-collector\\"}),\\"! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The application that we use for this blog is called \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),\\", a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Before we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrument-nodejs-apps-opentelemetry/elastic-blog-1-otel-config-options.png\\",alt:\\"options\\",width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data. Some of these include:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Dependencies between services, distributed tracing\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Machine learning (ML) correlations\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Log correlation\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and a unified view of the telemetry data, you will also be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"A clone of the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\", or your own \\",(0,n.jsx)(e.strong,{children:\\"Node.js\\"}),\\" application\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Basic understanding of Docker \\\\u2014 potentially install \\",(0,n.jsx)(e.a,{href:\\"https://www.docker.com/products/docker-desktop/\\",rel:\\"nofollow\\",children:\\"Docker Desktop\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Basic understanding of Node.js\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The full source code, including the Dockerfile used in this blog, can be found on \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". The repository also contains the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite\\",rel:\\"nofollow\\",children:\\"same application without instrumentation\\"}),\\". This allows you to compare each file and see the differences.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main#start-the-app\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", which will bring up the full project.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-0-log-in-to-your-elastic-cloud-account\\",children:\\"Step 0. Log in to your Elastic Cloud account\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"instructions to get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrument-nodejs-apps-opentelemetry/elastic-blog-2-free-trial.png\\",alt:\\"free trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-1-configure-auto-instrumentation-for-the-nodejs-service\\",children:\\"Step 1. Configure auto-instrumentation for the Node.js Service\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We are going to use automatic instrumentation with Node.js service from the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"We will be using the following service from Elastiflix:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`Elastiflix/node-server-otel-manual\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Per the \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/js/automatic/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry JavaScript documentation\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://www.npmjs.com/package/@opentelemetry/auto-instrumentations-node\\",rel:\\"nofollow\\",children:\\"@open-telemetry/auto-instrumentions-node\\"}),\\" documentation, you will simply install the appropriate node packages using npm.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`npm install --save @opentelemetry/api\\nnpm install --save @opentelemetry/auto-instrumentations-node\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you are running the Node.js service on the command line, then here is how you can run auto-instrument with Node.js.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`node --require \'@opentelemetry/auto-instrumentations-node/register\' app.js\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"For our application, we do this as part of the Dockerfile.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Dockerfile\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM node:14\\n\\nWORKDIR /app\\n\\nCOPY [\\"package.json\\", \\"./\\"]\\nRUN ls\\nRUN npm install --production\\nCOPY . .\\n\\nRUN npm install --save @opentelemetry/api\\nRUN npm install --save @opentelemetry/auto-instrumentations-node\\n\\n\\nEXPOSE 3001\\n\\nCMD [\\"node\\", \\"--require\\", \\"@opentelemetry/auto-instrumentations-node/register\\", \\"index.js\\"]\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-2-running-the-docker-image-with-environment-variables\\",children:\\"Step 2. Running the Docker image with environment variables\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"As specified in the \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/python/automatic/#configuring-the-agent\\",rel:\\"nofollow\\",children:\\"OTEL documentation\\"}),\\", we will use environment variables and pass in the configuration values to enable it to connect with \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\\\u2019s APM server\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Because Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Getting Elastic Cloud variables\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"You can copy the endpoints and token from Kibana\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" under the path /app/home#/tutorial/apm.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrument-nodejs-apps-opentelemetry/elastic-blog-3-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1924\\",height:\\"1304\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You will need to copy the following environment variables:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Build the image\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t node-otel-auto-image .\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Run the image\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker run \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\" \\\\\\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\\\\\n -e OTEL_SERVICE_NAME=\\"node-server-otel-auto\\" \\\\\\\\\\n -p 3001:3001 \\\\\\\\\\n node-server-otel-auto\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on some downstream services that you may not have running on your machine.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`curl localhost:3001/api/login\\ncurl localhost:3001/api/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:3001/api/favorites\\"; sleep 1; done;\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-3-explore-traces-metrics-and-logs-in-elastic-apm\\",children:\\"Step 3: Explore traces, metrics, and logs in Elastic APM\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Exploring the Services section in Elastic APM, you\\\\u2019ll see the Node service displayed.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrument-nodejs-apps-opentelemetry/elastic-blog-4-services.png\\",alt:\\"services\\",width:\\"1999\\",height:\\"944\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Clicking on the node-server-otel-auto service, you can see that it is ingesting telemetry data using OpenTelemetry.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"How to auto-instrument Node.js with OpenTelemetry\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Using standard commands in a Dockerfile, auto-instrumentation was done efficiently and without adding code in multiple places enabling manageability\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Since Elastic can support a mix of methods for ingesting data, whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\"}),`\\n`,(0,n.jsxs)(e.blockquote,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Python: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Java: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Node.js: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-apps-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\".NET: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Go: \\",(0,n.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return v(T);})();\\n;return Component;"},"_id":"articles/auto-instrument-nodejs-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/auto-instrument-nodejs-applications-opentelemetry.mdx","sourceFileName":"auto-instrument-nodejs-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/auto-instrument-nodejs-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/auto-instrument-nodejs-apps-opentelemetry/observability-launch-series-1-node-js-auto_(1).jpg","readingTime":"7 min read","url":"/auto-instrument-nodejs-apps-opentelemetry","headings":[{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":3,"title":"Prerequisites","href":"#prerequisites"},{"level":3,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":3,"title":"Step 0. Log in to your Elastic Cloud account","href":"#step-0-log-in-to-your-elastic-cloud-account"},{"level":3,"title":"Step 1. Configure auto-instrumentation for the Node.js Service","href":"#step-1-configure-auto-instrumentation-for-the-nodejs-service"},{"level":3,"title":"Step 2. Running the Docker image with environment variables","href":"#step-2-running-the-docker-image-with-environment-variables"},{"level":3,"title":"Step 3: Explore traces, metrics, and logs in Elastic APM","href":"#step-3-explore-traces-metrics-and-logs-in-elastic-apm"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Auto-instrumentation of Java applications with OpenTelemetry","slug":"auto-instrumentation-java-applications-opentelemetry","date":"2023-08-31","description":"Instrumenting Java applications with OpenTelemetry provides insights into application performance, dependencies, and errors. We\'ll show you how to automatically instrument a Java application using Docker, with no changes to your application code.","image":"observability-launch-series-3-java-auto.jpg","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"java","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the fast-paced universe of software development, especially in the cloud-native realm, DevOps and SRE teams are increasingly emerging as essential partners in application stability and growth.\\n\\nDevOps engineers continuously optimize software delivery, while SRE teams act as the stewards of application reliability, scalability, and top-tier performance. The challenge? These teams require a cutting-edge observability solution, one that encompasses full-stack insights, empowering them to rapidly manage, monitor, and rectify potential disruptions before they culminate into operational challenges.\\n\\nObservability in our modern distributed software ecosystem goes beyond mere monitoring — it demands limitless data collection, precision in processing, and the correlation of this data into actionable insights. However, the road to achieving this holistic view is paved with obstacles, from navigating version incompatibilities to wrestling with restrictive proprietary code.\\n\\nEnter [OpenTelemetry (OTel)](https://opentelemetry.io/), with the following benefits for those who adopt it:\\n\\n- Escape vendor constraints with OTel, freeing yourself from vendor lock-in and ensuring top-notch observability.\\n- See the harmony of unified logs, metrics, and traces come together to provide a complete system view.\\n- Improve your application oversight through richer and enhanced instrumentations.\\n- Embrace the benefits of backward compatibility to protect your prior instrumentation investments.\\n- Embark on the OpenTelemetry journey with an easy learning curve, simplifying onboarding and scalability.\\n- Rely on a proven, future-ready standard to boost your confidence in every investment.\\n\\nIn this blog, we will explore how you can use [automatic instrumentation in your Java](https://opentelemetry.io/docs/instrumentation/java/automatic/) application using Docker, without the need to refactor any part of your application code. We will use an [application called Elastiflix](https://github.com/elastic/observability-examples), which helps highlight auto-instrumentation in a simple way.\\n\\n## Application, prerequisites, and config\\n\\nThe application that we use for this blog is called [Elastiflix](https://github.com/elastic/observability-examples), a movie-streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\n\\nBefore we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\n\\n![Elastic configuration options for OpenTelemetry](/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-1-config.png)\\n\\nAll of Elastic Observability’s APM capabilities are available with OTel data. Some of these include:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services, distributed tracing\\n- Transactions (traces)\\n- Machine learning (ML) correlations\\n- Log correlation\\n\\nIn addition to Elastic’s APM and a unified view of the telemetry data, you will also be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\n\\n### Prerequisites\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/).\\n- A clone of the [Elastiflix demo application](https://github.com/elastic/observability-examples), or your own Java application\\n- Basic understanding of Docker — potentially install [Docker Desktop](https://www.docker.com/products/docker-desktop/)\\n- Basic understanding of Java\\n\\n### View the example source code\\n\\nThe full source code, including the Dockerfile used in this blog, can be found on [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto). The repository also contains the [same application without instrumentation](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite). This allows you to compare each file and see the differences.\\n\\nThe following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file [here](https://github.com/elastic/observability-examples/tree/main#start-the-app), which will bring up the full project.\\n\\n## Step-by-step guide\\n\\n### Step 0. Log in to your Elastic Cloud account\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the [instructions to get started on Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![free trial](/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-2-trial.png)\\n\\n### Step 1. Configure auto-instrumentation for the Java service\\n\\nWe are going to use automatic instrumentation with Java service from the [Elastiflix demo application](https://github.com/elastic/observability-examples/tree/main/Elastiflix/java-favorite-otel-auto).\\n\\nWe will be using the following service from Elastiflix:\\n\\n```bash\\nElastiflix/java-favorite-otel-auto\\n```\\n\\nPer the [OpenTelemetry Automatic Instrumentation for Java documentation](https://opentelemetry.io/docs/instrumentation/java/automatic/) and documentation, you will simply install the appropriate Java packages.\\n\\nCreate a local OTel directory to download the OpenTelemetry Java agent. Download opentelemetry-javaagent.jar.\\n\\n```bash\\n>mkdir /otel\\n\\n>curl -L https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/latest/download/opentelemetry-javaagent.jar –output /otel/opentelemetry-javaagent.jar\\n```\\n\\nIf you are going to run the service on the command line, then you can use the following command:\\n\\n```java\\njava -javaagent:/otel/opentelemetry-javaagent.jar \\\\\\n-jar /usr/src/app/target/favorite-0.0.1-SNAPSHOT.jar --server.port=5000\\n```\\n\\nFor our application, we will do this as part of the Dockerfile.\\n\\n**Dockerfile**\\n\\n```java\\nStart with a base image containing Java runtime\\nFROM maven:3.8.2-openjdk-17-slim as build\\n\\n# Make port 8080 available to the world outside this container\\nEXPOSE 5000\\n\\n# Change to the app directory\\nWORKDIR /usr/src/app\\n\\n# Copy the local code to the container\\nCOPY . .\\n\\n# Build the application\\nRUN mvn clean install\\n\\nUSER root\\nRUN apt-get update && apt-get install -y zip curl\\nRUN mkdir /otel\\nRUN curl -L -o /otel/opentelemetry-javaagent.jar https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.28.0/opentelemetry-javaagent.jar\\n\\nCOPY start.sh /start.sh\\nRUN chmod +x /start.sh\\n\\nENTRYPOINT [\\"/start.sh\\"]\\n```\\n\\n### Step 2. Running the Docker Image with environment variables\\n\\nAs specified in the [OTEL Java documentation](https://opentelemetry.io/docs/instrumentation/java/automatic/), we will use environment variables and pass in the configuration values to enable it to connect with [Elastic Observability’s APM server](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html).\\n\\nBecause Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\n\\n**Getting Elastic Cloud variables** \\nYou can copy the endpoints and token from Kibana under the path `/app/home#/tutorial/apm`.\\n\\n![apm agents](/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-3-apm-agents.png)\\n\\nYou will need to copy the following environment variables:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n```\\n\\n**Build the Docker image**\\n\\n```bash\\ndocker build -t java-otel-auto-image .\\n```\\n\\n**Run the Docker image**\\n\\n```bash\\ndocker run \\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"REPLACE WITH OTEL_EXPORTER_OTLP_ENDPOINT\\" \\\\\\n -e ELASTIC_APM_SECRET_TOKEN=\\"REPLACE WITH THE BIT AFTER Authorization=Bearer \\" \\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\n -e OTEL_SERVICE_NAME=\\"java-favorite-otel-auto\\" \\\\\\n -p 5000:5000 \\\\\\n java-otel-auto-image\\n```\\n\\nYou can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don’t currently have running. As mentioned before, you can find a more complete example using docker-compose [here](https://github.com/elastic/observability-examples/tree/main/Elastiflix).\\n\\n```bash\\ncurl localhost:5000/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n```\\n\\n### Step 3: Explore traces and logs in Elastic APM\\n\\nOnce you have this up and running, you can ping the endpoint for your instrumented service (in our case, this is /favorites), and you should see the app appear in Elastic APM, as shown below:\\n\\n![services](/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-4-services.png)\\n\\nIt will begin by tracking throughput and latency critical metrics for SREs to pay attention to.\\n\\nDigging in, we can see an overview of all our Transactions.\\n\\n![services-2](/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-5-services2.png)\\n\\nAnd look at specific transactions:\\n\\n![graph colored lines](/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-6-graph-colored.png)\\n\\nClick on **Logs,** and we see that logs are also brought over. The OTel Agent will automatically bring in logs and correlate them with traces for you:\\n\\n![graph-no-colors](/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-7-graph-no-colors.png)\\n\\nThis gives you complete visibility across logs, metrics, and traces!\\n\\n## Basic concepts: How APM works with Java\\n\\nBefore we continue, let\'s first understand a few basic concepts and terms.\\n\\n- **Java Agent:** This is a tool that can be used to instrument (or modify) the bytecode of class files in the Java Virtual Machine (JVM). Java agents are used for many purposes like performance monitoring, logging, security, and more.\\n- **Bytecode:** This is the intermediary code generated by the Java compiler from your Java source code. This code is interpreted or compiled on the fly by the JVM to produce machine code that can be executed.\\n- **Byte Buddy:** Byte Buddy is a code generation and manipulation library for Java. It is used to create, modify, or adapt Java classes at runtime. In the context of a Java Agent, Byte Buddy provides a powerful and flexible way to modify bytecode. **Both the Elastic APM Agent and the OpenTelemetry Agent use Byte Buddy under the covers.**\\n\\n**Now, let\'s talk about how automatic instrumentation works with Byte Buddy:**\\n\\nAutomatic instrumentation is the process by which an agent modifies the bytecode of your application\'s classes, often to insert monitoring code. The agent doesn\'t modify the source code directly, but rather the bytecode that is loaded into the JVM. This is done while the JVM is loading the classes, so the modifications are in effect during runtime.\\n\\nHere\'s a simplified explanation of the process:\\n\\n1. **Start the JVM with the agent:** When starting your Java application, you specify the Java agent with the -javaagent command line option. This instructs the JVM to load your agent before the main method of your application is invoked. At this point, the agent has the opportunity to set up class transformers.\\n\\n2. **Register a class file transformer with Byte Buddy:** Your agent will register a class file transformer with Byte Buddy. A transformer is a piece of code that is invoked every time a class is loaded into the JVM. This transformer receives the bytecode of the class, and it can modify this bytecode before the class is actually used.\\n\\n3. **Transform the bytecode:** When your transformer is invoked, it will use Byte Buddy\'s API to modify the bytecode. Byte Buddy allows you to specify your transformations in a high-level, expressive way rather than manually writing complex bytecode. For example, you could specify a certain class and method within that class that you want to instrument and provide an \\"interceptor\\" that will add new behavior to that method.\\n\\n4. **Use the transformed classes:** Once the agent has set up its transformers, the JVM continues to load classes as usual. Each time a class is loaded, your transformers are invoked, allowing them to modify the bytecode. Your application then uses these transformed classes as if they were the original ones, but they now have the extra behavior that you\'ve injected through your interceptor.\\n\\n![flowchart](/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-8-flowchart.png)\\n\\nIn essence, automatic instrumentation with Byte Buddy is about modifying the behavior of your Java classes at runtime, without needing to alter the source code directly. This is especially useful for cross-cutting concerns like logging, monitoring, or security, as it allows you to centralize this code in your Java Agent, rather than scattering it throughout your application.\\n\\n## Summary\\n\\nWith this Dockerfile, you\'ve transformed your simple Java application into one that\'s automatically instrumented with OpenTelemetry. This will aid greatly in understanding application performance, tracing errors, and gaining insights into how users interact with your software.\\n\\nRemember, observability is a crucial aspect of modern application development, especially in distributed systems. With tools like OpenTelemetry, understanding complex systems becomes a tad bit easier.\\n\\nIn this blog, we discussed the following:\\n\\n- How to auto-instrument Java with OpenTelemetry.\\n- Using standard commands in a Docker file, auto-instrumentation was done efficiently and without adding code in multiple places enabling manageability.\\n- Using OpenTelemetry and its support for multiple languages, DevOps and SRE teams can auto-instrument their applications with ease gaining immediate insights into the health of the entire application stack and reduce mean time to resolution (MTTR).\\n\\nSince Elastic can support a mix of methods for ingesting data, whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)o(n,i,{get:e[i],enumerable:!0})},l=(n,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!y.call(n,a)&&a!==i&&o(n,a,{get:()=>e[a],enumerable:!(r=u(e,a))||r.enumerable});return n};var v=(n,e,i)=>(i=n!=null?p(g(n)):{},l(e||!n||!n.__esModule?o(i,\\"default\\",{value:n,enumerable:!0}):i,n)),b=n=>l(o({},\\"__esModule\\",{value:!0}),n);var c=f((j,s)=>{s.exports=_jsx_runtime});var T={};w(T,{default:()=>d,frontmatter:()=>E});var t=v(c()),E={title:\\"Auto-instrumentation of Java applications with OpenTelemetry\\",slug:\\"auto-instrumentation-java-applications-opentelemetry\\",date:\\"2023-08-31\\",description:\\"Instrumenting Java applications with OpenTelemetry provides insights into application performance, dependencies, and errors. We\'ll show you how to automatically instrument a Java application using Docker, with no changes to your application code.\\",author:[{slug:\\"david-hope\\"}],image:\\"observability-launch-series-3-java-auto.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"java\\"},{slug:\\"apm\\"},{slug:\\"instrumentation\\"}]};function h(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"In the fast-paced universe of software development, especially in the cloud-native realm, DevOps and SRE teams are increasingly emerging as essential partners in application stability and growth.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"DevOps engineers continuously optimize software delivery, while SRE teams act as the stewards of application reliability, scalability, and top-tier performance. The challenge? These teams require a cutting-edge observability solution, one that encompasses full-stack insights, empowering them to rapidly manage, monitor, and rectify potential disruptions before they culminate into operational challenges.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Observability in our modern distributed software ecosystem goes beyond mere monitoring \\\\u2014 it demands limitless data collection, precision in processing, and the correlation of this data into actionable insights. However, the road to achieving this holistic view is paved with obstacles, from navigating version incompatibilities to wrestling with restrictive proprietary code.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Enter \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry (OTel)\\"}),\\", with the following benefits for those who adopt it:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Escape vendor constraints with OTel, freeing yourself from vendor lock-in and ensuring top-notch observability.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"See the harmony of unified logs, metrics, and traces come together to provide a complete system view.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Improve your application oversight through richer and enhanced instrumentations.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Embrace the benefits of backward compatibility to protect your prior instrumentation investments.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Embark on the OpenTelemetry journey with an easy learning curve, simplifying onboarding and scalability.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Rely on a proven, future-ready standard to boost your confidence in every investment.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this blog, we will explore how you can use \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/automatic/\\",rel:\\"nofollow\\",children:\\"automatic instrumentation in your Java\\"}),\\" application using Docker, without the need to refactor any part of your application code. We will use an \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"application called Elastiflix\\"}),\\", which helps highlight auto-instrumentation in a simple way.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The application that we use for this blog is called \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),\\", a movie-streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-1-config.png\\",alt:\\"Elastic configuration options for OpenTelemetry\\",width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data. Some of these include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Dependencies between services, distributed tracing\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Machine learning (ML) correlations\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Log correlation\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and a unified view of the telemetry data, you will also be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"A clone of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\", or your own Java application\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Basic understanding of Docker \\\\u2014 potentially install \\",(0,t.jsx)(e.a,{href:\\"https://www.docker.com/products/docker-desktop/\\",rel:\\"nofollow\\",children:\\"Docker Desktop\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Basic understanding of Java\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The full source code, including the Dockerfile used in this blog, can be found on \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". The repository also contains the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite\\",rel:\\"nofollow\\",children:\\"same application without instrumentation\\"}),\\". This allows you to compare each file and see the differences.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main#start-the-app\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", which will bring up the full project.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-0-log-in-to-your-elastic-cloud-account\\",children:\\"Step 0. Log in to your Elastic Cloud account\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"instructions to get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-2-trial.png\\",alt:\\"free trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-configure-auto-instrumentation-for-the-java-service\\",children:\\"Step 1. Configure auto-instrumentation for the Java service\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We are going to use automatic instrumentation with Java service from the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/java-favorite-otel-auto\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We will be using the following service from Elastiflix:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`Elastiflix/java-favorite-otel-auto\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Per the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/automatic/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Automatic Instrumentation for Java documentation\\"}),\\" and documentation, you will simply install the appropriate Java packages.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Create a local OTel directory to download the OpenTelemetry Java agent. Download opentelemetry-javaagent.jar.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`>mkdir /otel\\n\\n>curl -L https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/latest/download/opentelemetry-javaagent.jar \\\\u2013output /otel/opentelemetry-javaagent.jar\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you are going to run the service on the command line, then you can use the following command:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`java -javaagent:/otel/opentelemetry-javaagent.jar \\\\\\\\\\n-jar /usr/src/app/target/favorite-0.0.1-SNAPSHOT.jar --server.port=5000\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For our application, we will do this as part of the Dockerfile.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Dockerfile\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`Start with a base image containing Java runtime\\nFROM maven:3.8.2-openjdk-17-slim as build\\n\\n# Make port 8080 available to the world outside this container\\nEXPOSE 5000\\n\\n# Change to the app directory\\nWORKDIR /usr/src/app\\n\\n# Copy the local code to the container\\nCOPY . .\\n\\n# Build the application\\nRUN mvn clean install\\n\\nUSER root\\nRUN apt-get update && apt-get install -y zip curl\\nRUN mkdir /otel\\nRUN curl -L -o /otel/opentelemetry-javaagent.jar https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.28.0/opentelemetry-javaagent.jar\\n\\nCOPY start.sh /start.sh\\nRUN chmod +x /start.sh\\n\\nENTRYPOINT [\\"/start.sh\\"]\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-running-the-docker-image-with-environment-variables\\",children:\\"Step 2. Running the Docker Image with environment variables\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As specified in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/automatic/\\",rel:\\"nofollow\\",children:\\"OTEL Java documentation\\"}),\\", we will use environment variables and pass in the configuration values to enable it to connect with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\\\u2019s APM server\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Because Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Getting Elastic Cloud variables\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"You can copy the endpoints and token from Kibana under the path \\",(0,t.jsx)(e.code,{children:\\"/app/home#/tutorial/apm\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-3-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1924\\",height:\\"1304\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will need to copy the following environment variables:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Build the Docker image\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t java-otel-auto-image .\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Run the Docker image\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker run \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"REPLACE WITH OTEL_EXPORTER_OTLP_ENDPOINT\\" \\\\\\\\\\n -e ELASTIC_APM_SECRET_TOKEN=\\"REPLACE WITH THE BIT AFTER Authorization=Bearer \\" \\\\\\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\\\\\n -e OTEL_SERVICE_NAME=\\"java-favorite-otel-auto\\" \\\\\\\\\\n -p 5000:5000 \\\\\\\\\\n java-otel-auto-image\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don\\\\u2019t currently have running. As mentioned before, you can find a more complete example using docker-compose \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`curl localhost:5000/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-3-explore-traces-and-logs-in-elastic-apm\\",children:\\"Step 3: Explore traces and logs in Elastic APM\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you have this up and running, you can ping the endpoint for your instrumented service (in our case, this is /favorites), and you should see the app appear in Elastic APM, as shown below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-4-services.png\\",alt:\\"services\\",width:\\"1157\\",height:\\"893\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"It will begin by tracking throughput and latency critical metrics for SREs to pay attention to.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Digging in, we can see an overview of all our Transactions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-5-services2.png\\",alt:\\"services-2\\",width:\\"1999\\",height:\\"1127\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And look at specific transactions:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-6-graph-colored.png\\",alt:\\"graph colored lines\\",width:\\"1999\\",height:\\"1127\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click on \\",(0,t.jsx)(e.strong,{children:\\"Logs,\\"}),\\" and we see that logs are also brought over. The OTel Agent will automatically bring in logs and correlate them with traces for you:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-7-graph-no-colors.png\\",alt:\\"graph-no-colors\\",width:\\"1999\\",height:\\"1127\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This gives you complete visibility across logs, metrics, and traces!\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"basic-concepts-how-apm-works-with-java\\",children:\\"Basic concepts: How APM works with Java\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we continue, let\'s first understand a few basic concepts and terms.\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Java Agent:\\"}),\\" This is a tool that can be used to instrument (or modify) the bytecode of class files in the Java Virtual Machine (JVM). Java agents are used for many purposes like performance monitoring, logging, security, and more.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Bytecode:\\"}),\\" This is the intermediary code generated by the Java compiler from your Java source code. This code is interpreted or compiled on the fly by the JVM to produce machine code that can be executed.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Byte Buddy:\\"}),\\" Byte Buddy is a code generation and manipulation library for Java. It is used to create, modify, or adapt Java classes at runtime. In the context of a Java Agent, Byte Buddy provides a powerful and flexible way to modify bytecode. \\",(0,t.jsx)(e.strong,{children:\\"Both the Elastic APM Agent and the OpenTelemetry Agent use Byte Buddy under the covers.\\"})]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Now, let\'s talk about how automatic instrumentation works with Byte Buddy:\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Automatic instrumentation is the process by which an agent modifies the bytecode of your application\'s classes, often to insert monitoring code. The agent doesn\'t modify the source code directly, but rather the bytecode that is loaded into the JVM. This is done while the JVM is loading the classes, so the modifications are in effect during runtime.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here\'s a simplified explanation of the process:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Start the JVM with the agent:\\"}),\\" When starting your Java application, you specify the Java agent with the -javaagent command line option. This instructs the JVM to load your agent before the main method of your application is invoked. At this point, the agent has the opportunity to set up class transformers.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Register a class file transformer with Byte Buddy:\\"}),\\" Your agent will register a class file transformer with Byte Buddy. A transformer is a piece of code that is invoked every time a class is loaded into the JVM. This transformer receives the bytecode of the class, and it can modify this bytecode before the class is actually used.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Transform the bytecode:\\"}),` When your transformer is invoked, it will use Byte Buddy\'s API to modify the bytecode. Byte Buddy allows you to specify your transformations in a high-level, expressive way rather than manually writing complex bytecode. For example, you could specify a certain class and method within that class that you want to instrument and provide an \\"interceptor\\" that will add new behavior to that method.`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Use the transformed classes:\\"}),\\" Once the agent has set up its transformers, the JVM continues to load classes as usual. Each time a class is loaded, your transformers are invoked, allowing them to modify the bytecode. Your application then uses these transformed classes as if they were the original ones, but they now have the extra behavior that you\'ve injected through your interceptor.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-java-applications-opentelemetry/elastic-blog-8-flowchart.png\\",alt:\\"flowchart\\",width:\\"736\\",height:\\"289\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In essence, automatic instrumentation with Byte Buddy is about modifying the behavior of your Java classes at runtime, without needing to alter the source code directly. This is especially useful for cross-cutting concerns like logging, monitoring, or security, as it allows you to centralize this code in your Java Agent, rather than scattering it throughout your application.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With this Dockerfile, you\'ve transformed your simple Java application into one that\'s automatically instrumented with OpenTelemetry. This will aid greatly in understanding application performance, tracing errors, and gaining insights into how users interact with your software.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Remember, observability is a crucial aspect of modern application development, especially in distributed systems. With tools like OpenTelemetry, understanding complex systems becomes a tad bit easier.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"How to auto-instrument Java with OpenTelemetry.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Using standard commands in a Docker file, auto-instrumentation was done efficiently and without adding code in multiple places enabling manageability.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Using OpenTelemetry and its support for multiple languages, DevOps and SRE teams can auto-instrument their applications with ease gaining immediate insights into the health of the entire application stack and reduce mean time to resolution (MTTR).\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Since Elastic can support a mix of methods for ingesting data, whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Go: \\",(0,t.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return b(T);})();\\n;return Component;"},"_id":"articles/auto-instrumentation-of-java-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/auto-instrumentation-of-java-applications-opentelemetry.mdx","sourceFileName":"auto-instrumentation-of-java-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/auto-instrumentation-of-java-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/auto-instrumentation-java-applications-opentelemetry/observability-launch-series-3-java-auto.jpg","readingTime":"11 min read","url":"/auto-instrumentation-java-applications-opentelemetry","headings":[{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":3,"title":"Prerequisites","href":"#prerequisites"},{"level":3,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":3,"title":"Step 0. Log in to your Elastic Cloud account","href":"#step-0-log-in-to-your-elastic-cloud-account"},{"level":3,"title":"Step 1. Configure auto-instrumentation for the Java service","href":"#step-1-configure-auto-instrumentation-for-the-java-service"},{"level":3,"title":"Step 2. Running the Docker Image with environment variables","href":"#step-2-running-the-docker-image-with-environment-variables"},{"level":3,"title":"Step 3: Explore traces and logs in Elastic APM","href":"#step-3-explore-traces-and-logs-in-elastic-apm"},{"level":2,"title":"Basic concepts: How APM works with Java","href":"#basic-concepts-how-apm-works-with-java"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Auto-instrumentation of .NET applications with OpenTelemetry","slug":"auto-instrumentation-net-applications-opentelemetry","date":"2023-09-01","description":"OpenTelemetry provides an observability framework for cloud-native software, allowing us to trace, monitor, and debug applications seamlessly. In this post, we\'ll explore how to automatically instrument a .NET application using OpenTelemetry.","image":"observability-launch-series-4-net-auto.jpg","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"net","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the fast-paced universe of software development, especially in the cloud-native realm, DevOps and SRE teams are increasingly emerging as essential partners in application stability and growth.\\n\\nDevOps engineers continuously optimize software delivery, while SRE teams act as the stewards of application reliability, scalability, and top-tier performance. The challenge? These teams require a cutting-edge observability solution, one that encompasses full-stack insights, empowering them to rapidly manage, monitor, and rectify potential disruptions before they culminate into operational challenges.\\n\\nObservability in our modern distributed software ecosystem goes beyond mere monitoring — it demands limitless data collection, precision in processing, and the correlation of this data into actionable insights. However, the road to achieving this holistic view is paved with obstacles, from navigating version incompatibilities to wrestling with restrictive proprietary code.\\n\\nEnter [OpenTelemetry (OTel)](https://opentelemetry.io/), with the following benefits for those who adopt it:\\n\\n- Escape vendor constraints with OTel, freeing yourself from vendor lock-in and ensuring top-notch observability.\\n- See the harmony of unified logs, metrics, and traces come together to provide a complete system view.\\n- Improve your application oversight through richer and enhanced instrumentations.\\n- Embrace the benefits of backward compatibility to protect your prior instrumentation investments.\\n- Embark on the OpenTelemetry journey with an easy learning curve, simplifying onboarding and scalability.\\n- Rely on a proven, future-ready standard to boost your confidence in every investment.\\n- Explore manual instrumentation, enabling customized data collection to fit your unique needs.\\n- Ensure monitoring consistency across layers with a standardized observability data framework.\\n- Decouple development from operations, driving peak efficiency for both.\\n\\nGiven this context, OpenTelemetry emerges as an unmatched observability solution for cloud-native software, seamlessly enabling tracing, monitoring, and debugging. One of its strengths is the ability to auto-instrument applications, allowing developers the luxury of collecting invaluable telemetry without delving into code modifications.\\n\\nIn this post, we will dive into the methodology to instrument a .NET application using Docker, blending the best of both worlds: powerful observability without the code hassles.\\n\\n## What\'s covered?\\n\\n- How APM works with .NET using CLR Profiler functionality\\n- Creating a Docker image for a .NET application with the OpenTelemetry instrumentation baked in\\n- Installing and running the OpenTelemetry .NET Profiler for automatic instrumentation\\n\\n## How APM works with .NET using CLR Profiler functionality\\n\\nBefore we delve into the details, let\'s clear up some confusion around .NET Profilers and CPU Profilers like Elastic\xae’s Universal Profiling tool — we don’t want to get these two things mixed up, as they have very different purposes.\\n\\nWhen discussing profiling tools, especially in the context of .NET, it\'s not uncommon to encounter confusion between a \\".NET profiler\\" and a \\"CPU profiler.\\" Though both are used to diagnose and optimize applications, they serve different primary purposes and operate at different levels. Let\'s clarify the distinction:\\n\\n### .NET Profiler\\n\\n1. **Scope:** Specifically targets .NET applications. It is designed to work with the .NET runtime (i.e., the Common Language Runtime (CLR)).\\n\\n2. **Functionality:**\\n\\n3. **Use cases:**\\n\\n### CPU Profiler\\n\\n1. **Scope:** More general than a .NET profiler. It can profile any application, irrespective of the language or runtime, as long as it runs on the CPU being profiled.\\n\\n2. **Functionality:**\\n\\n3. **Use cases:**\\n\\nWhile both .NET profilers and CPU profilers aid in optimizing and diagnosing application performance, their approach and depth differ. A .NET profiler offers deep insights specifically into the .NET ecosystem, allowing for fine-grained analysis and instrumentation. In contrast, a CPU profiler provides a broader view, focusing on CPU usage patterns across any application, regardless of its development platform.\\n\\nIt\'s worth noting that for comprehensive profiling of a .NET application, you might use both: the .NET profiler to understand code-level behaviors specific to .NET and the CPU profiler to get an overview of CPU resource utilization.\\n\\nNow that we\'ve cleared that up, let\'s focus on the .NET Profiler, which we are discussing in this blog for automatic instrumentation of .NET applications. First, let\'s familiarize ourselves with some foundational concepts and terminologies relevant to a .NET Profiler:\\n\\n- **CLR (Common Language Runtime):** CLR is a core component of the .NET framework, acting as the execution engine for .NET apps. It provides key services like memory management, exception handling, and type safety.\\n- **Profiler API:**.NET provides a set of APIs for profiling applications. These APIs let tools and developers monitor or manipulate .NET applications during runtime.\\n- **IL (Intermediate Language):** After compiling, .NET source code turns into IL, a low-level, platform-agnostic representation. This IL code is then compiled just-in-time (JIT) into machine code by the CLR during application execution.\\n- **JIT compilation:** JIT stands for just-in-time. In .NET, the CLR compiles IL to native code just before its execution.\\n\\nNow, let\'s explore how automatic instrumentation works using CLR Profiler.\\n\\nAutomatic instrumentation in .NET, much like Java\'s bytecode instrumentation, revolves around modifying the behavior of your application\'s methods during runtime, without changing the actual source code.\\n\\nHere’s a step-by-step breakdown:\\n\\n1. **Attach the profiler:** When launching your .NET application, you\'ll have to specify to load the profiler. The CLR checks for the presence of a profiler by reading environment variables. If it finds one, the CLR initializes the profiler before any user code is executed.\\n\\n2. **Use Profiler API to monitor events:** The Profiler API allows a profiler to monitor various events. For instance, method JIT compilation events can be tracked. When a method is about to be JIT compiled, the profiler gets notified.\\n\\n3. **Manipulate IL code:** Upon getting notified of a JIT compilation, the profiler can manipulate the IL code of the method. Using the Profiler API, the profiler can insert, delete, or replace IL instructions. This is analogous to how Java agents modify bytecode. For example, if you want to measure a method\'s execution time, you\'d modify the IL to insert calls to start and stop a timer at the beginning and end of the method, respectively.\\n\\n4. **Execution of transformed code:** Once the IL has been modified, the JIT compiler will translate it into machine code. The application will then execute this machine code, which includes the additions made by the profiler.\\n\\n5. **Gather and report data:** The added instrumentation can collect various data, such as method execution times or call counts. This data can then be relayed to an application performance management (APM) tool, which can provide insights, visualizations, and alerts based on the data.\\n\\nIn essence, automatic instrumentation with CLR Profiler is about modifying the behavior of your .NET methods at runtime. This is invaluable for monitoring, diagnosing, and fine-tuning the performance of .NET applications without intruding on the application\'s actual source code.\\n\\n## Prerequisites\\n\\n- A basic understanding of Docker and .NET\\n- Elastic Cloud\\n- Docker installed on your machine (we recommend docker desktop)\\n\\n## View the example source code\\n\\nThe full source code, including the Dockerfile used in this blog, can be found on [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/dotnet-login-otel-manual). The repository also contains the [same application without instrumentation](https://github.com/elastic/observability-examples/tree/main/Elastiflix/dotnet-login). This allows you to compare each file and see the differences.\\n\\nThe following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file [here](https://github.com/elastic/observability-examples/tree/main#start-the-app), which will bring up the full project.\\n\\n## Step-by-step guide\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the [instructions to get started on Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![](/assets/images/auto-instrumentation-net-applications-opentelemetry/elastic-blog-2-free-trial.png)\\n\\n## Step 1. Base image setup\\n\\nStart with the .NET runtime image for the base layer of our Dockerfile:\\n\\n```dockerfile\\nFROM ${ARCH}mcr.microsoft.com/dotnet/aspnet:7.0. AS base\\nWORKDIR /app\\nEXPOSE 8000\\n```\\n\\nHere, we\'re setting up the application\'s runtime environment.\\n\\n## Step 2. Building the .NET application\\n\\nThis feature of Docker is just the best. Here, we compile our .NET application using the SDK image. In the bad old days, we used to build on a different platform and then put the compiled code into the Docker container. This way, we are much more confident our build will replicate from a developer’s desktop and into production by using Docker all the way through.\\n\\n```dockerfile\\nFROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0-preview AS build\\nARG TARGETPLATFORM\\n\\nWORKDIR /src\\nCOPY [\\"login.csproj\\", \\"./\\"]\\nRUN dotnet restore \\"./login.csproj\\"\\nCOPY . .\\nWORKDIR \\"/src/.\\"\\nRUN dotnet build \\"login.csproj\\" -c Release -o /app/build\\n```\\n\\nThis section ensures that our .NET code is properly restored and compiled.\\n\\n## Step 3. Publishing the application\\n\\nOnce built, we\'ll publish the app:\\n\\n```bash\\nFROM build AS publish\\nRUN dotnet publish \\"login.csproj\\" -c Release -o /app/publish\\n```\\n\\n## Step 4. Preparing the final image\\n\\nNow, let\'s set up the final runtime image:\\n\\n```dockerfile\\nFROM base AS final\\nWORKDIR /app\\nCOPY --from=publish /app/publish\\n```\\n\\n## Step 5. Installing OpenTelemetry\\n\\nWe\'ll install dependencies and download the OpenTelemetry auto-instrumentation script:\\n\\n```bash\\nRUN apt-get update && apt-get install -y zip curl\\nRUN mkdir /otel\\nRUN curl -L -o /otel/otel-dotnet-install.sh https://github.com/open-telemetry/opentelemetry-dotnet-instrumentation/releases/download/v0.7.0/otel-dotnet-auto-install.sh\\nRUN chmod +x /otel/otel-dotnet-install.sh\\n```\\n\\n## Step 6. Configure OpenTelemetry\\n\\nDesignate where OpenTelemetry should reside and execute the installation script. Note that the ENV OTEL_DOTNET_AUTO_HOME is required as the script looks for it:\\n\\n```bash\\nENV OTEL_DOTNET_AUTO_HOME=/otel\\nRUN /bin/bash /otel/otel-dotnet-install.sh\\n```\\n\\n## Step 7. Additional configuration\\n\\nMake sure the auto-instrumentation and platform detection scripts are executable and run the platform detection script.\\n\\n```bash\\nCOPY platform-detection.sh /otel/\\nRUN chmod +x /otel/instrument.sh\\nRUN chmod +x /otel/platform-detection.sh && /otel/platform-detection.sh\\n```\\n\\nThis platform detection script will check if the Docker build is for ARM64 and implement a workaround to get the OpenTelemetry instrumentation to work on MacOS. If you happen to be running locally on MacOS M1 or M2 processors, you will be grateful for this script.\\n\\n## Step 8. Entry point setup\\n\\nLastly, set the Docker image\'s entry point to both source the OpenTelemetry instrumentation, which sets up the environment variables required to bootstrap the .NET Profiler, and then we start our .NET application:\\n\\n```bash\\nENTRYPOINT [\\"/bin/bash\\", \\"-c\\", \\"source /otel/instrument.sh && dotnet login.dll\\"]\\n```\\n\\n## Step 9. Running the Docker image with environment variables\\n\\nTo build and run the Docker image, you\'d typically follow these steps:\\n\\n### Build the Docker image\\n\\nFirst, you\'d want to build the Docker image from your Dockerfile. Let\'s assume the Dockerfile is in the current directory, and you\'d like to name/tag your image dotnet-login-otel-image.\\n\\n```bash\\ndocker build -t dotnet-login-otel-image .\\n```\\n\\n### Run the Docker image\\n\\nAfter building the image, you\'d run it with the specified environment variables. For this, the docker **run** command is used with the -e flag for each environment variable.\\n\\n```bash\\ndocker run \\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer ${ELASTIC_APM_SECRET_TOKEN}\\" \\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"${ELASTIC_APM_SERVER_URL}\\" \\\\\\n -e OTEL_METRICS_EXPORTER=\\"otlp\\" \\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\n -e OTEL_SERVICE_NAME=\\"dotnet-login-otel-auto\\" \\\\\\n -e OTEL_TRACES_EXPORTER=\\"otlp\\" \\\\\\n dotnet-login-otel-image\\n```\\n\\nMake sure that `${ELASTIC_APM_SECRET_TOKEN}` and `${ELASTIC_APM_SERVER_URL}` are set in your shell environment, and replace them with their actual values from the cloud as shown below. \\nGetting Elastic Cloud variables\\n\\nYou can copy the endpoints and token from Kibana\xae under the path `/app/home#/tutorial/apm`.\\n\\n![apm agents](/assets/images/auto-instrumentation-net-applications-opentelemetry/elastic-blog-3-apm-agents.png)\\n\\nYou can also use an environment file with docker run --env-file to make the command less verbose if you have multiple environment variables.\\n\\nOnce you have this up and running, you can ping the endpoint for your instrumented service (in our case, this is /login), and you should see the app appear in Elastic APM, as shown below:\\n\\n![services](/assets/images/auto-instrumentation-net-applications-opentelemetry/services-3.png)\\n\\nIt will begin by tracking throughput and latency critical metrics for SREs to pay attention to.\\n\\n![dotnet-login-otel-auto-1](/assets/images/auto-instrumentation-net-applications-opentelemetry/dotnet-login-otel-auto-1.png)\\n\\nDigging in, we can see an overview of all our Transactions.\\n\\n![dotnet-login-otel-auto-2](/assets/images/auto-instrumentation-net-applications-opentelemetry/dotnet-login-otel-auto-2.png)\\n\\nAnd look at specific transactions:\\n\\n![specific transactions](/assets/images/auto-instrumentation-net-applications-opentelemetry/specific_transactions.png)\\n\\nThere is clearly an outlier here, where one transaction took over 200ms. This is likely to be due to the .NET CLR warming up. Click on **Logs** , and we see that logs are also brought over. The OTel Agent will automatically bring in logs and correlate them with traces for you:\\n\\n![otel agent](/assets/images/auto-instrumentation-net-applications-opentelemetry/otel_agent.png)\\n\\n## Wrapping up\\n\\nWith this Dockerfile, you\'ve transformed your simple .NET application into one that\'s automatically instrumented with OpenTelemetry. This will aid greatly in understanding application performance, tracing errors, and gaining insights into how users interact with your software.\\n\\nRemember, observability is a crucial aspect of modern application development, especially in distributed systems. With tools like OpenTelemetry, understanding complex systems becomes a tad bit easier.\\n\\nIn this blog, we discussed the following:\\n\\n- How to auto-instrument .NET with OpenTelemetry.\\n- Using standard commands in a Docker file, auto-instrumentation was done efficiently and without adding code in multiple places enabling manageability.\\n- Using OpenTelemetry and its support for multiple languages, DevOps and SRE teams can auto-instrument their applications with ease gaining immediate insights into the health of the entire application stack and reduce mean time to resolution (MTTR).\\n\\nSince Elastic can support a mix of methods for ingesting data, whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),w=(t,e)=>{for(var i in e)r(t,i,{get:e[i],enumerable:!0})},a=(t,e,i,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!f.call(t,o)&&o!==i&&r(t,o,{get:()=>e[o],enumerable:!(l=u(e,o))||l.enumerable});return t};var b=(t,e,i)=>(i=t!=null?p(g(t)):{},a(e||!t||!t.__esModule?r(i,\\"default\\",{value:t,enumerable:!0}):i,t)),T=t=>a(r({},\\"__esModule\\",{value:!0}),t);var c=y((O,s)=>{s.exports=_jsx_runtime});var E={};w(E,{default:()=>d,frontmatter:()=>v});var n=b(c()),v={title:\\"Auto-instrumentation of .NET applications with OpenTelemetry\\",slug:\\"auto-instrumentation-net-applications-opentelemetry\\",date:\\"2023-09-01\\",description:\\"OpenTelemetry provides an observability framework for cloud-native software, allowing us to trace, monitor, and debug applications seamlessly. In this post, we\'ll explore how to automatically instrument a .NET application using OpenTelemetry.\\",author:[{slug:\\"david-hope\\"}],image:\\"observability-launch-series-4-net-auto.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"net\\"},{slug:\\"instrumentation\\"}]};function h(t){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"In the fast-paced universe of software development, especially in the cloud-native realm, DevOps and SRE teams are increasingly emerging as essential partners in application stability and growth.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"DevOps engineers continuously optimize software delivery, while SRE teams act as the stewards of application reliability, scalability, and top-tier performance. The challenge? These teams require a cutting-edge observability solution, one that encompasses full-stack insights, empowering them to rapidly manage, monitor, and rectify potential disruptions before they culminate into operational challenges.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Observability in our modern distributed software ecosystem goes beyond mere monitoring \\\\u2014 it demands limitless data collection, precision in processing, and the correlation of this data into actionable insights. However, the road to achieving this holistic view is paved with obstacles, from navigating version incompatibilities to wrestling with restrictive proprietary code.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Enter \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry (OTel)\\"}),\\", with the following benefits for those who adopt it:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Escape vendor constraints with OTel, freeing yourself from vendor lock-in and ensuring top-notch observability.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"See the harmony of unified logs, metrics, and traces come together to provide a complete system view.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Improve your application oversight through richer and enhanced instrumentations.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Embrace the benefits of backward compatibility to protect your prior instrumentation investments.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Embark on the OpenTelemetry journey with an easy learning curve, simplifying onboarding and scalability.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Rely on a proven, future-ready standard to boost your confidence in every investment.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Explore manual instrumentation, enabling customized data collection to fit your unique needs.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Ensure monitoring consistency across layers with a standardized observability data framework.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Decouple development from operations, driving peak efficiency for both.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Given this context, OpenTelemetry emerges as an unmatched observability solution for cloud-native software, seamlessly enabling tracing, monitoring, and debugging. One of its strengths is the ability to auto-instrument applications, allowing developers the luxury of collecting invaluable telemetry without delving into code modifications.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this post, we will dive into the methodology to instrument a .NET application using Docker, blending the best of both worlds: powerful observability without the code hassles.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"whats-covered\\",children:\\"What\'s covered?\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"How APM works with .NET using CLR Profiler functionality\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Creating a Docker image for a .NET application with the OpenTelemetry instrumentation baked in\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Installing and running the OpenTelemetry .NET Profiler for automatic instrumentation\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"how-apm-works-with-net-using-clr-profiler-functionality\\",children:\\"How APM works with .NET using CLR Profiler functionality\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Before we delve into the details, let\'s clear up some confusion around .NET Profilers and CPU Profilers like Elastic\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\"\\\\u2019s Universal Profiling tool \\\\u2014 we don\\\\u2019t want to get these two things mixed up, as they have very different purposes.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:`When discussing profiling tools, especially in the context of .NET, it\'s not uncommon to encounter confusion between a \\".NET profiler\\" and a \\"CPU profiler.\\" Though both are used to diagnose and optimize applications, they serve different primary purposes and operate at different levels. Let\'s clarify the distinction:`}),`\\n`,(0,n.jsx)(e.h3,{id:\\"net-profiler\\",children:\\".NET Profiler\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Scope:\\"}),\\" Specifically targets .NET applications. It is designed to work with the .NET runtime (i.e., the Common Language Runtime (CLR)).\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Functionality:\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Use cases:\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"cpu-profiler\\",children:\\"CPU Profiler\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Scope:\\"}),\\" More general than a .NET profiler. It can profile any application, irrespective of the language or runtime, as long as it runs on the CPU being profiled.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Functionality:\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Use cases:\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"While both .NET profilers and CPU profilers aid in optimizing and diagnosing application performance, their approach and depth differ. A .NET profiler offers deep insights specifically into the .NET ecosystem, allowing for fine-grained analysis and instrumentation. In contrast, a CPU profiler provides a broader view, focusing on CPU usage patterns across any application, regardless of its development platform.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"It\'s worth noting that for comprehensive profiling of a .NET application, you might use both: the .NET profiler to understand code-level behaviors specific to .NET and the CPU profiler to get an overview of CPU resource utilization.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that we\'ve cleared that up, let\'s focus on the .NET Profiler, which we are discussing in this blog for automatic instrumentation of .NET applications. First, let\'s familiarize ourselves with some foundational concepts and terminologies relevant to a .NET Profiler:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"CLR (Common Language Runtime):\\"}),\\" CLR is a core component of the .NET framework, acting as the execution engine for .NET apps. It provides key services like memory management, exception handling, and type safety.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Profiler API:\\"}),\\".NET provides a set of APIs for profiling applications. These APIs let tools and developers monitor or manipulate .NET applications during runtime.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"IL (Intermediate Language):\\"}),\\" After compiling, .NET source code turns into IL, a low-level, platform-agnostic representation. This IL code is then compiled just-in-time (JIT) into machine code by the CLR during application execution.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"JIT compilation:\\"}),\\" JIT stands for just-in-time. In .NET, the CLR compiles IL to native code just before its execution.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now, let\'s explore how automatic instrumentation works using CLR Profiler.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Automatic instrumentation in .NET, much like Java\'s bytecode instrumentation, revolves around modifying the behavior of your application\'s methods during runtime, without changing the actual source code.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here\\\\u2019s a step-by-step breakdown:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Attach the profiler:\\"}),\\" When launching your .NET application, you\'ll have to specify to load the profiler. The CLR checks for the presence of a profiler by reading environment variables. If it finds one, the CLR initializes the profiler before any user code is executed.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Use Profiler API to monitor events:\\"}),\\" The Profiler API allows a profiler to monitor various events. For instance, method JIT compilation events can be tracked. When a method is about to be JIT compiled, the profiler gets notified.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Manipulate IL code:\\"}),\\" Upon getting notified of a JIT compilation, the profiler can manipulate the IL code of the method. Using the Profiler API, the profiler can insert, delete, or replace IL instructions. This is analogous to how Java agents modify bytecode. For example, if you want to measure a method\'s execution time, you\'d modify the IL to insert calls to start and stop a timer at the beginning and end of the method, respectively.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Execution of transformed code:\\"}),\\" Once the IL has been modified, the JIT compiler will translate it into machine code. The application will then execute this machine code, which includes the additions made by the profiler.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Gather and report data:\\"}),\\" The added instrumentation can collect various data, such as method execution times or call counts. This data can then be relayed to an application performance management (APM) tool, which can provide insights, visualizations, and alerts based on the data.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In essence, automatic instrumentation with CLR Profiler is about modifying the behavior of your .NET methods at runtime. This is invaluable for monitoring, diagnosing, and fine-tuning the performance of .NET applications without intruding on the application\'s actual source code.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"A basic understanding of Docker and .NET\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Elastic Cloud\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Docker installed on your machine (we recommend docker desktop)\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The full source code, including the Dockerfile used in this blog, can be found on \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/dotnet-login-otel-manual\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". The repository also contains the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/dotnet-login\\",rel:\\"nofollow\\",children:\\"same application without instrumentation\\"}),\\". This allows you to compare each file and see the differences.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main#start-the-app\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", which will bring up the full project.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"instructions to get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-net-applications-opentelemetry/elastic-blog-2-free-trial.png\\",alt:\\"\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-1-base-image-setup\\",children:\\"Step 1. Base image setup\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Start with the .NET runtime image for the base layer of our Dockerfile:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM \\\\${ARCH}mcr.microsoft.com/dotnet/aspnet:7.0. AS base\\nWORKDIR /app\\nEXPOSE 8000\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here, we\'re setting up the application\'s runtime environment.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-2-building-the-net-application\\",children:\\"Step 2. Building the .NET application\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This feature of Docker is just the best. Here, we compile our .NET application using the SDK image. In the bad old days, we used to build on a different platform and then put the compiled code into the Docker container. This way, we are much more confident our build will replicate from a developer\\\\u2019s desktop and into production by using Docker all the way through.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0-preview AS build\\nARG TARGETPLATFORM\\n\\nWORKDIR /src\\nCOPY [\\"login.csproj\\", \\"./\\"]\\nRUN dotnet restore \\"./login.csproj\\"\\nCOPY . .\\nWORKDIR \\"/src/.\\"\\nRUN dotnet build \\"login.csproj\\" -c Release -o /app/build\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This section ensures that our .NET code is properly restored and compiled.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-3-publishing-the-application\\",children:\\"Step 3. Publishing the application\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once built, we\'ll publish the app:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`FROM build AS publish\\nRUN dotnet publish \\"login.csproj\\" -c Release -o /app/publish\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-4-preparing-the-final-image\\",children:\\"Step 4. Preparing the final image\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now, let\'s set up the final runtime image:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM base AS final\\nWORKDIR /app\\nCOPY --from=publish /app/publish\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-5-installing-opentelemetry\\",children:\\"Step 5. Installing OpenTelemetry\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We\'ll install dependencies and download the OpenTelemetry auto-instrumentation script:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`RUN apt-get update && apt-get install -y zip curl\\nRUN mkdir /otel\\nRUN curl -L -o /otel/otel-dotnet-install.sh https://github.com/open-telemetry/opentelemetry-dotnet-instrumentation/releases/download/v0.7.0/otel-dotnet-auto-install.sh\\nRUN chmod +x /otel/otel-dotnet-install.sh\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-6-configure-opentelemetry\\",children:\\"Step 6. Configure OpenTelemetry\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Designate where OpenTelemetry should reside and execute the installation script. Note that the ENV OTEL_DOTNET_AUTO_HOME is required as the script looks for it:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`ENV OTEL_DOTNET_AUTO_HOME=/otel\\nRUN /bin/bash /otel/otel-dotnet-install.sh\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-7-additional-configuration\\",children:\\"Step 7. Additional configuration\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Make sure the auto-instrumentation and platform detection scripts are executable and run the platform detection script.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`COPY platform-detection.sh /otel/\\nRUN chmod +x /otel/instrument.sh\\nRUN chmod +x /otel/platform-detection.sh && /otel/platform-detection.sh\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This platform detection script will check if the Docker build is for ARM64 and implement a workaround to get the OpenTelemetry instrumentation to work on MacOS. If you happen to be running locally on MacOS M1 or M2 processors, you will be grateful for this script.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-8-entry-point-setup\\",children:\\"Step 8. Entry point setup\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Lastly, set the Docker image\'s entry point to both source the OpenTelemetry instrumentation, which sets up the environment variables required to bootstrap the .NET Profiler, and then we start our .NET application:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`ENTRYPOINT [\\"/bin/bash\\", \\"-c\\", \\"source /otel/instrument.sh && dotnet login.dll\\"]\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-9-running-the-docker-image-with-environment-variables\\",children:\\"Step 9. Running the Docker image with environment variables\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To build and run the Docker image, you\'d typically follow these steps:\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"build-the-docker-image\\",children:\\"Build the Docker image\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"First, you\'d want to build the Docker image from your Dockerfile. Let\'s assume the Dockerfile is in the current directory, and you\'d like to name/tag your image dotnet-login-otel-image.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t dotnet-login-otel-image .\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"run-the-docker-image\\",children:\\"Run the Docker image\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"After building the image, you\'d run it with the specified environment variables. For this, the docker \\",(0,n.jsx)(e.strong,{children:\\"run\\"}),\\" command is used with the -e flag for each environment variable.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker run \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\\\${ELASTIC_APM_SECRET_TOKEN}\\" \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\\\${ELASTIC_APM_SERVER_URL}\\" \\\\\\\\\\n -e OTEL_METRICS_EXPORTER=\\"otlp\\" \\\\\\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\\\\\n -e OTEL_SERVICE_NAME=\\"dotnet-login-otel-auto\\" \\\\\\\\\\n -e OTEL_TRACES_EXPORTER=\\"otlp\\" \\\\\\\\\\n dotnet-login-otel-image\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Make sure that \\",(0,n.jsx)(e.code,{children:\\"${ELASTIC_APM_SECRET_TOKEN}\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"${ELASTIC_APM_SERVER_URL}\\"}),\\" are set in your shell environment, and replace them with their actual values from the cloud as shown below.\\",(0,n.jsx)(e.br,{}),`\\n`,\\"Getting Elastic Cloud variables\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You can copy the endpoints and token from Kibana\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" under the path \\",(0,n.jsx)(e.code,{children:\\"/app/home#/tutorial/apm\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-net-applications-opentelemetry/elastic-blog-3-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1924\\",height:\\"1304\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You can also use an environment file with docker run --env-file to make the command less verbose if you have multiple environment variables.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once you have this up and running, you can ping the endpoint for your instrumented service (in our case, this is /login), and you should see the app appear in Elastic APM, as shown below:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-net-applications-opentelemetry/services-3.png\\",alt:\\"services\\",width:\\"1458\\",height:\\"997\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"It will begin by tracking throughput and latency critical metrics for SREs to pay attention to.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-net-applications-opentelemetry/dotnet-login-otel-auto-1.png\\",alt:\\"dotnet-login-otel-auto-1\\",width:\\"1437\\",height:\\"997\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Digging in, we can see an overview of all our Transactions.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-net-applications-opentelemetry/dotnet-login-otel-auto-2.png\\",alt:\\"dotnet-login-otel-auto-2\\",width:\\"1438\\",height:\\"984\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"And look at specific transactions:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-net-applications-opentelemetry/specific_transactions.png\\",alt:\\"specific transactions\\",width:\\"1439\\",height:\\"994\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"There is clearly an outlier here, where one transaction took over 200ms. This is likely to be due to the .NET CLR warming up. Click on \\",(0,n.jsx)(e.strong,{children:\\"Logs\\"}),\\" , and we see that logs are also brought over. The OTel Agent will automatically bring in logs and correlate them with traces for you:\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-net-applications-opentelemetry/otel_agent.png\\",alt:\\"otel agent\\",width:\\"1433\\",height:\\"994\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"wrapping-up\\",children:\\"Wrapping up\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"With this Dockerfile, you\'ve transformed your simple .NET application into one that\'s automatically instrumented with OpenTelemetry. This will aid greatly in understanding application performance, tracing errors, and gaining insights into how users interact with your software.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Remember, observability is a crucial aspect of modern application development, especially in distributed systems. With tools like OpenTelemetry, understanding complex systems becomes a tad bit easier.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"How to auto-instrument .NET with OpenTelemetry.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Using standard commands in a Docker file, auto-instrumentation was done efficiently and without adding code in multiple places enabling manageability.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Using OpenTelemetry and its support for multiple languages, DevOps and SRE teams can auto-instrument their applications with ease gaining immediate insights into the health of the entire application stack and reduce mean time to resolution (MTTR).\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Since Elastic can support a mix of methods for ingesting data, whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\"}),`\\n`,(0,n.jsxs)(e.blockquote,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Python: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Java: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Node.js: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\".NET: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Go: \\",(0,n.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return T(E);})();\\n;return Component;"},"_id":"articles/auto-instrumentation-of-net-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/auto-instrumentation-of-net-applications-opentelemetry.mdx","sourceFileName":"auto-instrumentation-of-net-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/auto-instrumentation-of-net-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/auto-instrumentation-net-applications-opentelemetry/observability-launch-series-4-net-auto.jpg","readingTime":"13 min read","url":"/auto-instrumentation-net-applications-opentelemetry","headings":[{"level":2,"title":"What\'s covered?","href":"#whats-covered"},{"level":2,"title":"How APM works with .NET using CLR Profiler functionality","href":"#how-apm-works-with-net-using-clr-profiler-functionality"},{"level":3,"title":".NET Profiler","href":"#net-profiler"},{"level":3,"title":"CPU Profiler","href":"#cpu-profiler"},{"level":2,"title":"Prerequisites","href":"#prerequisites"},{"level":2,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":2,"title":"Step 1. Base image setup","href":"#step-1-base-image-setup"},{"level":2,"title":"Step 2. Building the .NET application","href":"#step-2-building-the-net-application"},{"level":2,"title":"Step 3. Publishing the application","href":"#step-3-publishing-the-application"},{"level":2,"title":"Step 4. Preparing the final image","href":"#step-4-preparing-the-final-image"},{"level":2,"title":"Step 5. Installing OpenTelemetry","href":"#step-5-installing-opentelemetry"},{"level":2,"title":"Step 6. Configure OpenTelemetry","href":"#step-6-configure-opentelemetry"},{"level":2,"title":"Step 7. Additional configuration","href":"#step-7-additional-configuration"},{"level":2,"title":"Step 8. Entry point setup","href":"#step-8-entry-point-setup"},{"level":2,"title":"Step 9. Running the Docker image with environment variables","href":"#step-9-running-the-docker-image-with-environment-variables"},{"level":3,"title":"Build the Docker image","href":"#build-the-docker-image"},{"level":3,"title":"Run the Docker image","href":"#run-the-docker-image"},{"level":2,"title":"Wrapping up","href":"#wrapping-up"}]},{"title":"Automatic instrumentation with OpenTelemetry for Python applications","slug":"auto-instrumentation-python-applications-opentelemetry","date":"2023-08-31","description":"Learn how to auto-instrument Python applications using OpenTelemetry. With standard commands in a Docker file, applications can be instrumented quickly without writing code in multiple places, enabling rapid change, scale, and easier management.","image":"observability-launch-series-2-python-auto_(1).jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"python","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nDevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\n\\nObservability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers — a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\n\\nThanks to [OpenTelemetry](https://opentelemetry.io) (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and has a large support community reducing vendor lock-in.\\n\\nIn a [previous blog](https://www.elastic.co/blog/opentelemetry-observability), we also reviewed how to use the [OpenTelemetry demo](https://github.com/elastic/opentelemetry-demo) and connect it to Elastic\xae, as well as some of Elastic’s capabilities with [OpenTelemetry visualizations](https://www.elastic.co/observability/opentelemetry) and Kubernetes.\\n\\nIn this blog, we will show how to use [automatic instrumentation for OpenTelemetry](https://opentelemetry.io/docs/instrumentation/python/) with the Python service of our [application called Elastiflix](https://github.com/elastic/observability-examples), which helps highlight auto-instrumentation in a simple way.\\n\\nThe beauty of this is that there is **no need for the otel-collector**! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\n\\n## Application, prerequisites, and config\\n\\nThe application that we use for this blog is called [Elastiflix](https://github.com/elastic/observability-examples), a movie-streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\n\\nBefore we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\n\\n![Elastic configuration options for OpenTelemetry](/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-1-otel-config-options.png)\\n\\nAll of Elastic Observability’s APM capabilities are available with OTel data. Some of these include:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services, distributed tracing\\n- Transactions (traces)\\n- Machine learning (ML) correlations\\n- Log correlation\\n\\nIn addition to Elastic’s APM and a unified view of the telemetry data, you will also be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\n\\n### Prerequisites\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/)\\n- A clone of the [Elastiflix demo application](https://github.com/elastic/observability-examples), or your own Python application\\n- Basic understanding of Docker — potentially install [Docker Desktop](https://www.docker.com/products/docker-desktop/)\\n- Basic understanding of Python\\n\\n### View the example source code\\n\\nThe full source code, including the Dockerfile used in this blog, can be found on [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto). The repository also contains the [same application without instrumentation](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite). This allows you to compare each file and see the differences.\\n\\nThe following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file [here](https://github.com/elastic/observability-examples/tree/main#start-the-app), which will bring up the full project.\\n\\n## Step-by-step guide\\n\\n### Step 0. Log in to your Elastic Cloud account\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the [instructions to get started on Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![free trial](/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-2-free-trial.png)\\n\\n### Step 1. Configure auto-instrumentation for the Python Service\\n\\nWe are going to use automatic instrumentation with Python service from the [Elastiflix demo application](https://github.com/elastic/observability-examples).\\n\\nWe will be using the following service from Elastiflix:\\n\\n```bash\\nElastiflix/python-favorite-otel-auto\\n```\\n\\nPer the [OpenTelemetry Automatic Instrumentation for Python documentation](https://opentelemetry.io/docs/instrumentation/js/automatic/), you will simply install the appropriate Python packages using pip install.\\n\\n```bash\\n>pip install opentelemetry-distro \\\\\\n\\topentelemetry-exporter-otlp\\n\\n>opentelemetry-bootstrap -a install\\n```\\n\\nIf you are running the Python service on the command line, then you can use the following command:\\n\\n```bash\\nopentelemetry-instrument python main.py\\n```\\n\\nFor our application, we do this as part of the Dockerfile.\\n\\n**Dockerfile**\\n\\n```dockerfile\\nFROM python:3.9-slim as base\\n\\n# get packages\\nCOPY requirements.txt .\\nRUN pip install -r requirements.txt\\nWORKDIR /favoriteservice\\n\\n#install opentelemetry packages\\nRUN pip install opentelemetry-distro \\\\\\n\\topentelemetry-exporter-otlp\\n\\nRUN opentelemetry-bootstrap -a install\\n\\n# Add the application\\nCOPY . .\\n\\nEXPOSE 5000\\nENTRYPOINT [ \\"opentelemetry-instrument\\", \\"python\\", \\"main.py\\"]\\n```\\n\\n### Step 2. Running the Docker image with environment variables\\n\\nAs specified in the [OTEL Python documentation](https://opentelemetry.io/docs/instrumentation/python/automatic/#configuring-the-agent), we will use environment variables and pass in the configuration values to enable it to connect with [Elastic Observability’s APM server](https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html).\\n\\nBecause Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\n\\n**Getting Elastic Cloud variables** \\nYou can copy the endpoints and token from Kibana\xae under the path /app/home#/tutorial/apm.\\n\\n![apm agents](/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-3-apm-agents.png)\\n\\nYou will need to copy the following environment variables:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n```\\n\\n**Build the image**\\n\\n```bash\\ndocker build -t python-otel-auto-image .\\n```\\n\\n**Run the image**\\n\\n```bash\\ndocker run \\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer%20\\" \\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\n -e OTEL_SERVICE_NAME=\\"python-favorite-otel-auto\\" \\\\\\n -p 5001:5001 \\\\\\n python-otel-auto-image\\n```\\n\\n**Important:** Note that the “OTEL_EXPORTER_OTLP_HEADERS” variable has the whitespace after Bearer escaped as “%20” — this is a requirement for Python.\\n\\nYou can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don’t currently have running. As mentioned before, you can find a more complete example using docker-compose [here](https://github.com/elastic/observability-examples/tree/main/Elastiflix).\\n\\n```bash\\ncurl localhost:5000/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n```\\n\\n### Step 3: Explore traces, metrics, and logs in Elastic APM\\n\\nExploring the Services section in Elastic APM, you’ll see the Python service displayed.\\n\\n![services](/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-4-services.png)\\n\\nClicking on the python-favorite-otel-auto service , you can see that it is ingesting telemetry data using OpenTelemetry.\\n\\n![graph view](/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-5-graph-view.png)\\n\\nIn this blog, we discussed the following:\\n\\n- How to auto-instrument Python with OpenTelemetry\\n- Using standard commands in a Dockerfile, auto-instrumentation was done efficiently and without adding code in multiple places\\n\\nSince Elastic can support a mix of methods for ingesting data, whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var d=Object.create;var l=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)l(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!f.call(n,o)&&o!==i&&l(n,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return n};var b=(n,e,i)=>(i=n!=null?d(g(n)):{},r(e||!n||!n.__esModule?l(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>r(l({},\\"__esModule\\",{value:!0}),n);var c=y((P,s)=>{s.exports=_jsx_runtime});var T={};w(T,{default:()=>p,frontmatter:()=>E});var t=b(c()),E={title:\\"Automatic instrumentation with OpenTelemetry for Python applications\\",slug:\\"auto-instrumentation-python-applications-opentelemetry\\",date:\\"2023-08-31\\",description:\\"Learn how to auto-instrument Python applications using OpenTelemetry. With standard commands in a Docker file, applications can be instrumented quickly without writing code in multiple places, enabling rapid change, scale, and easier management.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"observability-launch-series-2-python-auto_(1).jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"python\\"},{slug:\\"instrumentation\\"}]};function h(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"DevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Observability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers \\\\u2014 a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Thanks to \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and has a large support community reducing vendor lock-in.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In a \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\", we also reviewed how to use the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry demo\\"}),\\" and connect it to Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\", as well as some of Elastic\\\\u2019s capabilities with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"OpenTelemetry visualizations\\"}),\\" and Kubernetes.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this blog, we will show how to use \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/python/\\",rel:\\"nofollow\\",children:\\"automatic instrumentation for OpenTelemetry\\"}),\\" with the Python service of our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"application called Elastiflix\\"}),\\", which helps highlight auto-instrumentation in a simple way.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The beauty of this is that there is \\",(0,t.jsx)(e.strong,{children:\\"no need for the otel-collector\\"}),\\"! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The application that we use for this blog is called \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),\\", a movie-streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-1-otel-config-options.png\\",alt:\\"Elastic configuration options for OpenTelemetry\\",width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data. Some of these include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Dependencies between services, distributed tracing\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Machine learning (ML) correlations\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Log correlation\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and a unified view of the telemetry data, you will also be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"A clone of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\", or your own Python application\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Basic understanding of Docker \\\\u2014 potentially install \\",(0,t.jsx)(e.a,{href:\\"https://www.docker.com/products/docker-desktop/\\",rel:\\"nofollow\\",children:\\"Docker Desktop\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Basic understanding of Python\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The full source code, including the Dockerfile used in this blog, can be found on \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". The repository also contains the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite\\",rel:\\"nofollow\\",children:\\"same application without instrumentation\\"}),\\". This allows you to compare each file and see the differences.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main#start-the-app\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", which will bring up the full project.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-0-log-in-to-your-elastic-cloud-account\\",children:\\"Step 0. Log in to your Elastic Cloud account\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"instructions to get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-2-free-trial.png\\",alt:\\"free trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-configure-auto-instrumentation-for-the-python-service\\",children:\\"Step 1. Configure auto-instrumentation for the Python Service\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We are going to use automatic instrumentation with Python service from the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We will be using the following service from Elastiflix:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`Elastiflix/python-favorite-otel-auto\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Per the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/js/automatic/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Automatic Instrumentation for Python documentation\\"}),\\", you will simply install the appropriate Python packages using pip install.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`>pip install opentelemetry-distro \\\\\\\\\\n\\topentelemetry-exporter-otlp\\n\\n>opentelemetry-bootstrap -a install\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you are running the Python service on the command line, then you can use the following command:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`opentelemetry-instrument python main.py\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For our application, we do this as part of the Dockerfile.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Dockerfile\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM python:3.9-slim as base\\n\\n# get packages\\nCOPY requirements.txt .\\nRUN pip install -r requirements.txt\\nWORKDIR /favoriteservice\\n\\n#install opentelemetry packages\\nRUN pip install opentelemetry-distro \\\\\\\\\\n\\topentelemetry-exporter-otlp\\n\\nRUN opentelemetry-bootstrap -a install\\n\\n# Add the application\\nCOPY . .\\n\\nEXPOSE 5000\\nENTRYPOINT [ \\"opentelemetry-instrument\\", \\"python\\", \\"main.py\\"]\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-running-the-docker-image-with-environment-variables\\",children:\\"Step 2. Running the Docker image with environment variables\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As specified in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/python/automatic/#configuring-the-agent\\",rel:\\"nofollow\\",children:\\"OTEL Python documentation\\"}),\\", we will use environment variables and pass in the configuration values to enable it to connect with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\\\u2019s APM server\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Because Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Getting Elastic Cloud variables\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"You can copy the endpoints and token from Kibana\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" under the path /app/home#/tutorial/apm.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-3-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1924\\",height:\\"1304\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will need to copy the following environment variables:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Build the image\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t python-otel-auto-image .\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Run the image\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker run \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer%20\\" \\\\\\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\\\\\n -e OTEL_SERVICE_NAME=\\"python-favorite-otel-auto\\" \\\\\\\\\\n -p 5001:5001 \\\\\\\\\\n python-otel-auto-image\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Important:\\"}),\\" Note that the \\\\u201COTEL_EXPORTER_OTLP_HEADERS\\\\u201D variable has the whitespace after Bearer escaped as \\\\u201C%20\\\\u201D \\\\u2014 this is a requirement for Python.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don\\\\u2019t currently have running. As mentioned before, you can find a more complete example using docker-compose \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`curl localhost:5000/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-3-explore-traces-metrics-and-logs-in-elastic-apm\\",children:\\"Step 3: Explore traces, metrics, and logs in Elastic APM\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Exploring the Services section in Elastic APM, you\\\\u2019ll see the Python service displayed.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-4-services.png\\",alt:\\"services\\",width:\\"1999\\",height:\\"1124\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Clicking on the python-favorite-otel-auto service , you can see that it is ingesting telemetry data using OpenTelemetry.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/auto-instrumentation-python-applications-opentelemetry/elastic-blog-5-graph-view.png\\",alt:\\"graph view\\",width:\\"1999\\",height:\\"1124\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"How to auto-instrument Python with OpenTelemetry\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Using standard commands in a Dockerfile, auto-instrumentation was done efficiently and without adding code in multiple places\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Since Elastic can support a mix of methods for ingesting data, whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Go: \\",(0,t.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function p(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(T);})();\\n;return Component;"},"_id":"articles/auto-instrumentation-of-python-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/auto-instrumentation-of-python-applications-opentelemetry.mdx","sourceFileName":"auto-instrumentation-of-python-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/auto-instrumentation-of-python-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/auto-instrumentation-python-applications-opentelemetry/observability-launch-series-2-python-auto_(1).jpg","readingTime":"7 min read","url":"/auto-instrumentation-python-applications-opentelemetry","headings":[{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":3,"title":"Prerequisites","href":"#prerequisites"},{"level":3,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":3,"title":"Step 0. Log in to your Elastic Cloud account","href":"#step-0-log-in-to-your-elastic-cloud-account"},{"level":3,"title":"Step 1. Configure auto-instrumentation for the Python Service","href":"#step-1-configure-auto-instrumentation-for-the-python-service"},{"level":3,"title":"Step 2. Running the Docker image with environment variables","href":"#step-2-running-the-docker-image-with-environment-variables"},{"level":3,"title":"Step 3: Explore traces, metrics, and logs in Elastic APM","href":"#step-3-explore-traces-metrics-and-logs-in-elastic-apm"}]},{"title":"One-Step Ingest for CloudWatch Logs and Metrics into Elastic Observability with Amazon Data Firehose","slug":"aws-data-firehose-onboarding","date":"2024-11-26","description":"AWS users can now leverage the new guided onboarding workflow to ingest CloudWatch logs and metrics in Elastic Cloud and explore the usage and performance of over twenty AWS services within minutes, using the provided CloudFormation template.","image":"154567_Image 21.jpg","author":[{"slug":"akhilesh-pokhariyal","type":"Author","_raw":{}},{"slug":"mykola-harmash","type":"Author","_raw":{}},{"slug":"kaiyan-white","type":"Author","_raw":{}}],"tags":[{"slug":"aws","type":"Tag","_raw":{}},{"slug":"aws-kinesis-data-firehose","type":"Tag","_raw":{}}],"body":{"raw":"\\n## Overview of the new Quickstart guided workflow\\n\\nElastic Observability has been supporting AWS logs ingest with Amazon Data Firehose over the last few releases. To makes configuration easier, we introduced, in 8.16, a one step guided workflow to onboard all CloudWatch logs and metrics from a single region. The configuration uses a pre-populated CloudFormation template, to automatically create a Amazon Data Firehose and connect to Elastic Observability. Additionally, all the relevant Elastic AWS Integrations are auto-installed. The configuration ensures ingestion for metrics from all namespaces and a policy to ingest logs from all existing log groups. Any new metric namespaces and log groups post setup will also be ingested automatically. Additionally, the CloudFormation template can also be customized and deployed in a production environment using infra-as-code. \\n\\nThis allows SREs to to start monitoring the usage and health of their popular AWS services using pre-built dashboards within minutes. This blog reviews how to setup this quickstart workflow, and the out-of-the box dashboards that will be populated from it.\\n\\n## Onboarding data using Amazon Data Firehose\\n\\nIn order to utilize this guided workflow, a user needs the superuser built-in Kibana role. A deployment of the hosted Elasticsearch service of version 8.16 on [Elastic Cloud](https://cloud.elastic.co/login?redirectTo=%2Fhome) is required. Further, an active AWS account and the necessary permissions to create delivery streams, run CloudFormation, create CloudWatch log group/metric streams are needed.\\n\\nLet’s walk through the steps required to onboard data using this workflow. There should be some CloudWatch logs and metrics already available in the customer account. The screenshot below shows an example where a number of CloudWatch metrics namespaces already exist.\\n\\n![CloudWatch metrics already present](/assets/images/aws-data-firehose-onboarding/AWS-CloudWatch-Metrics.png)\\n\\nSimilarly, a number of CloudWatch log groups are already present in this customer account as shown below.\\n\\n![CloudWatch logs already present](/assets/images/aws-data-firehose-onboarding/AWS-CloudWatch-Log-Groups.png)\\n\\nThis guided workflow is accessible from the ‘Add data’ left navigation option in the Elastic Observability app. The user needs to select the ‘Cloud’ option and click on the ‘AWS’ tile. The Amazon Firehose quickstart onboarding workflow is available at the top left and is labeled as a Quickstart option, as shown below.\xa0\xa0\\n\\n![Firehose onboarding tile](/assets/images/aws-data-firehose-onboarding/Kibana-Onboarding-Firehose-Card.png)\\n\\n\\nThe Data Firehose delivery stream can be created either using the AWS CLI or the AWS console, as shown in step 2 of the guided workflow below.\xa0\\n\\n![Firehose onboarding step 1](/assets/images/aws-data-firehose-onboarding/Kibana-Firehose-Flow-Start.png)\\n\\n\\nBy clicking on the ‘Create Firehose Stream in AWS’ button under the ‘Via AWS Console’ tab, the user will be taken to the AWS console and the menu for creating the CloudFormation stack, as shown below.\xa0\\n\\n![Firehose onboarding aws console](/assets/images/aws-data-firehose-onboarding/AWS-CloudFormation-Template-Form-1.png)\\n\\nThe CloudFormation (CF) template provided by Elastic has prepopulated default settings including the Elasticsearch endpoint and the API key, as shown in the screenshot above. The user can review these defaults in the AWS console and proceed by clicking on the ‘Create stack’ button, as shown below. Note that this stack creates IAM resources and so the checkbox acknowledging that must be checked to move forward.\xa0\\n\\n![CF template 2](/assets/images/aws-data-firehose-onboarding/AWS-CloudFormation-Template-Form-2.png)\\n\\n\\n![CF template complete](/assets/images/aws-data-firehose-onboarding/CloudFormation-Template-Complete.png)\\n\\nOnce the CloudFormation stack has been created in AWS, the user can switch back to Kibana. By default, the CF stack will consist of separate delivery streams for CloudWatch logs and metrics, as shown below.\xa0\\n\\n![Firehose streams](/assets/images/aws-data-firehose-onboarding/Firehose-Streams.png)\\n\\nIn Kibana, under step 3 ‘Visualize your data’ of the workflow, the incoming data starts to appear, categorized by AWS service type as shown below. The page refreshes automatically every 5 s and the new services appear at the bottom of the list.\xa0\xa0\\n\\n![Services detected 01](/assets/images/aws-data-firehose-onboarding/Kibana-AWS-Services-Detected-1.png)\\n\\n![Services detected 02](/assets/images/aws-data-firehose-onboarding/Kibana-AWS-Services-Detected-2.png)\\n\\n\\nFor each detected AWS service, the user is recommended 1-2 pre-built dashboards to explore the health and usage of their services. For example, the pre-built dashboard shown below provides a quick overview on the usage of the NAT Gateway.\xa0\xa0\\n\\n![Nat Gateway dashboard](/assets/images/aws-data-firehose-onboarding/NAT-Gateway-Dashboard.png)\\n\\n\\nIn addition to pre-built dashboards, Discover can also be used to explore the ingested CloudWatch logs, as shown below.\xa0\\n\\n![Discover for logs](/assets/images/aws-data-firehose-onboarding/ECS-Logs.png)\\n\\nAWS Usage overview can be explored using the pre-built dashboard shown below.\\n\\n![AWS usage](/assets/images/aws-data-firehose-onboarding/AWS-Usage-Dashboard.png)\\n\\n\\n## Customisation options\\n\\nThe region needs to be selected/modified in the AWS console as shown below, before starting with the CF stack creation.\xa0\\n\\n![AWS region selector](/assets/images/aws-data-firehose-onboarding/AWS-Console-Region-Selector.png)\\n\\n\\nThe setting of `EnableCloudWatchLogs` parameter and the setting of `EnableCloudWatchMetrics` parameter in the AWS console or the CF template can be changed to disable the collection of logs or metrics.\\n\\n\\nThe `MetricNameFilters` parameter in the CF template or console can be used to exclude specific namespace-metric names pairs from collection.\\n\\n\\nThe CF template provided by Elastic can be used together with the Terraform resource [aws_cloudformation_stack](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudformation_stack) as shown below to deploy in the production environment, to facilitate as-code deployment.\\n\\n![Terraform template](/assets/images/aws-data-firehose-onboarding/Terraform-Template.png)\\n\\n\\n## Start your own exploration\xa0\\n\\nThe new guided onboarding workflow for AWS utilizes the Amazon Firehose delivery stream to collect all available CloudWatch logs & metrics, from a single customer account and a single region. The workflow also installs AWS Integration packages in the Elastic stack, enabling users to start monitoring the usage and performance of their common AWS services using pre-built dashboards, within minutes. Some of the AWS services that can be monitored using this workflow are listed below. A complete list of over twenty services that are supported by this workflow along with additional details are available [here](https://www.elastic.co/guide/en/observability/current/collect-data-with-aws-firehose.html).\\n\\n| | |\\n| ---------------- | ------------- |\\n| VPC Flow Logs | Logs |\\n| API Gateway | Logs, Metrics |\\n| CloudTrail | Logs |\\n| Network Firewall | Logs, Metrics |\\n| WAF | Logs |\\n| EC2 | Metrics |\\n| RDS | Metrics |\\n","code":"var Component=(()=>{var g=Object.create;var s=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var b=(a,e)=>()=>(e||a((e={exports:{}}).exports,e),e.exports),f=(a,e)=>{for(var o in e)s(a,o,{get:e[o],enumerable:!0})},n=(a,e,o,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of p(e))!w.call(a,i)&&i!==o&&s(a,i,{get:()=>e[i],enumerable:!(r=m(e,i))||r.enumerable});return a};var v=(a,e,o)=>(o=a!=null?g(u(a)):{},n(e||!a||!a.__esModule?s(o,\\"default\\",{value:a,enumerable:!0}):o,a)),y=a=>n(s({},\\"__esModule\\",{value:!0}),a);var d=b((k,l)=>{l.exports=_jsx_runtime});var C={};f(C,{default:()=>c,frontmatter:()=>A});var t=v(d()),A={title:\\"One-Step Ingest for CloudWatch Logs and Metrics into Elastic Observability with Amazon Data Firehose\\",slug:\\"aws-data-firehose-onboarding\\",date:\\"2024-11-26\\",description:\\"AWS users can now leverage the new guided onboarding workflow to ingest CloudWatch logs and metrics in Elastic Cloud and explore the usage and performance of over twenty AWS services within minutes, using the provided CloudFormation template.\\",author:[{slug:\\"akhilesh-pokhariyal\\"},{slug:\\"mykola-harmash\\"},{slug:\\"kaiyan-white\\"}],image:\\"154567_Image 21.jpg\\",tags:[{slug:\\"aws\\"},{slug:\\"aws-kinesis-data-firehose\\"}]};function h(a){let e={a:\\"a\\",code:\\"code\\",div:\\"div\\",h2:\\"h2\\",img:\\"img\\",p:\\"p\\",table:\\"table\\",tbody:\\"tbody\\",td:\\"td\\",th:\\"th\\",thead:\\"thead\\",tr:\\"tr\\",...a.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.h2,{id:\\"overview-of-the-new-quickstart-guided-workflow\\",children:\\"Overview of the new Quickstart guided workflow\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic Observability has been supporting AWS logs ingest with Amazon Data Firehose over the last few releases. To makes configuration easier, we introduced, in 8.16, a one step guided workflow to onboard all CloudWatch logs and metrics from a single region. The configuration uses a pre-populated CloudFormation template, to automatically create a Amazon Data Firehose and connect to Elastic Observability. Additionally, all the relevant Elastic AWS Integrations are auto-installed. The configuration ensures ingestion for metrics from all namespaces and a policy to ingest logs from all existing log groups. Any new metric namespaces and log groups post setup will also be ingested automatically. Additionally, the CloudFormation template can also be customized and deployed in a production environment using infra-as-code.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This allows SREs to to start monitoring the usage and health of their popular AWS services using pre-built dashboards within minutes. This blog reviews how to setup this quickstart workflow, and the out-of-the box dashboards that will be populated from it.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"onboarding-data-using-amazon-data-firehose\\",children:\\"Onboarding data using Amazon Data Firehose\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In order to utilize this guided workflow, a user needs the superuser built-in Kibana role. A deployment of the hosted Elasticsearch service of version 8.16 on \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/login?redirectTo=%2Fhome\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" is required. Further, an active AWS account and the necessary permissions to create delivery streams, run CloudFormation, create CloudWatch log group/metric streams are needed.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s walk through the steps required to onboard data using this workflow. There should be some CloudWatch logs and metrics already available in the customer account. The screenshot below shows an example where a number of CloudWatch metrics namespaces already exist.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/AWS-CloudWatch-Metrics.png\\",alt:\\"CloudWatch metrics already present\\",width:\\"3093\\",height:\\"2421\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Similarly, a number of CloudWatch log groups are already present in this customer account as shown below.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/AWS-CloudWatch-Log-Groups.png\\",alt:\\"CloudWatch logs already present\\",width:\\"3097\\",height:\\"2420\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This guided workflow is accessible from the \\\\u2018Add data\\\\u2019 left navigation option in the Elastic Observability app. The user needs to select the \\\\u2018Cloud\\\\u2019 option and click on the \\\\u2018AWS\\\\u2019 tile. The Amazon Firehose quickstart onboarding workflow is available at the top left and is labeled as a Quickstart option, as shown below.\\\\xA0\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/Kibana-Onboarding-Firehose-Card.png\\",alt:\\"Firehose onboarding tile\\",width:\\"3092\\",height:\\"2418\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Data Firehose delivery stream can be created either using the AWS CLI or the AWS console, as shown in step 2 of the guided workflow below.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/Kibana-Firehose-Flow-Start.png\\",alt:\\"Firehose onboarding step 1\\",width:\\"3090\\",height:\\"2425\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"By clicking on the \\\\u2018Create Firehose Stream in AWS\\\\u2019 button under the \\\\u2018Via AWS Console\\\\u2019 tab, the user will be taken to the AWS console and the menu for creating the CloudFormation stack, as shown below.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/AWS-CloudFormation-Template-Form-1.png\\",alt:\\"Firehose onboarding aws console\\",width:\\"3094\\",height:\\"2417\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The CloudFormation (CF) template provided by Elastic has prepopulated default settings including the Elasticsearch endpoint and the API key, as shown in the screenshot above. The user can review these defaults in the AWS console and proceed by clicking on the \\\\u2018Create stack\\\\u2019 button, as shown below. Note that this stack creates IAM resources and so the checkbox acknowledging that must be checked to move forward.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/AWS-CloudFormation-Template-Form-2.png\\",alt:\\"CF template 2\\",width:\\"3099\\",height:\\"2424\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/CloudFormation-Template-Complete.png\\",alt:\\"CF template complete\\",width:\\"3091\\",height:\\"2420\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once the CloudFormation stack has been created in AWS, the user can switch back to Kibana. By default, the CF stack will consist of separate delivery streams for CloudWatch logs and metrics, as shown below.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/Firehose-Streams.png\\",alt:\\"Firehose streams\\",width:\\"3093\\",height:\\"2416\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In Kibana, under step 3 \\\\u2018Visualize your data\\\\u2019 of the workflow, the incoming data starts to appear, categorized by AWS service type as shown below. The page refreshes automatically every 5 s and the new services appear at the bottom of the list.\\\\xA0\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/Kibana-AWS-Services-Detected-1.png\\",alt:\\"Services detected 01\\",width:\\"3093\\",height:\\"2420\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/Kibana-AWS-Services-Detected-2.png\\",alt:\\"Services detected 02\\",width:\\"3093\\",height:\\"2423\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For each detected AWS service, the user is recommended 1-2 pre-built dashboards to explore the health and usage of their services. For example, the pre-built dashboard shown below provides a quick overview on the usage of the NAT Gateway.\\\\xA0\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/NAT-Gateway-Dashboard.png\\",alt:\\"Nat Gateway dashboard\\",width:\\"3720\\",height:\\"2555\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to pre-built dashboards, Discover can also be used to explore the ingested CloudWatch logs, as shown below.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/ECS-Logs.png\\",alt:\\"Discover for logs\\",width:\\"3720\\",height:\\"2559\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"AWS Usage overview can be explored using the pre-built dashboard shown below.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/AWS-Usage-Dashboard.png\\",alt:\\"AWS usage\\",width:\\"3726\\",height:\\"2550\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"customisation-options\\",children:\\"Customisation options\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The region needs to be selected/modified in the AWS console as shown below, before starting with the CF stack creation.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/AWS-Console-Region-Selector.png\\",alt:\\"AWS region selector\\",width:\\"2681\\",height:\\"2392\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The setting of \\",(0,t.jsx)(e.code,{children:\\"EnableCloudWatchLogs\\"}),\\" parameter and the setting of \\",(0,t.jsx)(e.code,{children:\\"EnableCloudWatchMetrics\\"}),\\" parameter in the AWS console or the CF template can be changed to disable the collection of logs or metrics.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.code,{children:\\"MetricNameFilters\\"}),\\" parameter in the CF template or console can be used to exclude specific namespace-metric names pairs from collection.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The CF template provided by Elastic can be used together with the Terraform resource \\",(0,t.jsx)(e.a,{href:\\"https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudformation_stack\\",rel:\\"nofollow\\",children:\\"aws_cloudformation_stack\\"}),\\" as shown below to deploy in the production environment, to facilitate as-code deployment.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/aws-data-firehose-onboarding/Terraform-Template.png\\",alt:\\"Terraform template\\",width:\\"2092\\",height:\\"1680\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"start-your-own-exploration\\",children:\\"Start your own exploration\\\\xA0\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The new guided onboarding workflow for AWS utilizes the Amazon Firehose delivery stream to collect all available CloudWatch logs & metrics, from a single customer account and a single region. The workflow also installs AWS Integration packages in the Elastic stack, enabling users to start monitoring the usage and performance of their common AWS services using pre-built dashboards, within minutes. Some of the AWS services that can be monitored using this workflow are listed below. A complete list of over twenty services that are supported by this workflow along with additional details are available \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/collect-data-with-aws-firehose.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.div,{className:\\"table-container\\",children:(0,t.jsxs)(e.table,{children:[(0,t.jsx)(e.thead,{children:(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.th,{}),(0,t.jsx)(e.th,{})]})}),(0,t.jsxs)(e.tbody,{children:[(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"VPC Flow Logs\\"}),(0,t.jsx)(e.td,{children:\\"Logs\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"API Gateway\\"}),(0,t.jsx)(e.td,{children:\\"Logs, Metrics\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"CloudTrail\\"}),(0,t.jsx)(e.td,{children:\\"Logs\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"Network Firewall\\"}),(0,t.jsx)(e.td,{children:\\"Logs, Metrics\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"WAF\\"}),(0,t.jsx)(e.td,{children:\\"Logs\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"EC2\\"}),(0,t.jsx)(e.td,{children:\\"Metrics\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"RDS\\"}),(0,t.jsx)(e.td,{children:\\"Metrics\\"})]})]})]})})]})}function c(a={}){let{wrapper:e}=a.components||{};return e?(0,t.jsx)(e,{...a,children:(0,t.jsx)(h,{...a})}):h(a)}return y(C);})();\\n;return Component;"},"_id":"articles/aws-data-firehose-onboarding.mdx","_raw":{"sourceFilePath":"articles/aws-data-firehose-onboarding.mdx","sourceFileName":"aws-data-firehose-onboarding.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/aws-data-firehose-onboarding"},"type":"Article","imageUrl":"/assets/images/aws-data-firehose-onboarding/154567_Image 21.jpg","readingTime":"6 min read","url":"/aws-data-firehose-onboarding","headings":[{"level":2,"title":"Overview of the new Quickstart guided workflow","href":"#overview-of-the-new-quickstart-guided-workflow"},{"level":2,"title":"Onboarding data using Amazon Data Firehose","href":"#onboarding-data-using-amazon-data-firehose"},{"level":2,"title":"Customisation options","href":"#customisation-options"},{"level":2,"title":"Start your own exploration\xa0","href":"#start-your-own-exploration"}]},{"title":"Unleash the power of Elastic and Amazon Kinesis Data Firehose to enhance observability and data analytics","slug":"aws-kinesis-data-firehose-observability-analytics","date":"2023-05-18","description":"AWS users can now leverage the new Amazon Kinesis Firehose Delivery Stream to directly ingest logs into Elastic Cloud in real time for centralized alerting, troubleshooting, and analytics across your cloud and on-premises infrastructure.","image":"image2.png","author":[{"slug":"udayasimha-theepireddy-uday","type":"Author","_raw":{}},{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"aws","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"aws-kinesis-data-firehose","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs more organizations leverage the Amazon Web Services (AWS) cloud platform and services to drive operational efficiency and bring products to market, managing logs becomes a critical component of maintaining visibility and safeguarding multi-account AWS environments. Traditionally, logs are stored in Amazon Simple Storage Service (Amazon S3) and then shipped to an external monitoring and analysis solution for further processing.\\n\\nTo simplify this process and reduce management overhead, AWS users can now leverage the new Amazon Kinesis Firehose Delivery Stream to ingest logs into Elastic Cloud in AWS in real time and view them in the Elastic Stack alongside other logs for centralized analytics. This eliminates the necessity for time-consuming and expensive procedures such as VM provisioning or data shipper operations.\\n\\nElastic Observability unifies logs, metrics, and application performance monitoring (APM) traces for a full contextual view across your hybrid [AWS environments alongside their on-premises data sets](https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy). Elastic Observability enables you to track and monitor performance [across a broad range of AWS services](https://www.elastic.co/observability/aws-monitoring), including AWS Lambda, Amazon Elastic Compute Cloud (EC2), Amazon Elastic Container Service (ECS), Amazon Elastic Kubernetes Service (EKS), Amazon Simple Storage Service (S3), Amazon Cloudtrail, Amazon Network Firewall, and more.\\n\\nIn this blog, we will walk you through how to use the Amazon Kinesis Data Firehose integration — [Elastic is listed in the Amazon Kinesis Firehose](https://aws.amazon.com/blogs/big-data/accelerate-data-insights-with-elastic-and-amazon-kinesis-data-firehose/) drop-down list — to simplify your architecture and send logs to Elastic, so you can monitor and safeguard your multi-account AWS environments.\\n\\n## Announcing the Kinesis Firehose method\\n\\nElastic currently provides both agent-based and serverless mechanisms, and we are pleased to announce the addition of the Kinesis Firehose method. This new method enables customers to directly ingest logs from AWS into Elastic, supplementing our existing options.\\n\\n- [**Elastic Agent**](https://www.youtube.com/watch?v=pnGXjljuEnY) pulls metrics and logs from CloudWatch and S3 where logs are generally pushed from a service (for example, EC2, ELB, WAF, Route53) and ingests them into Elastic Cloud.\\n- [**Elastic’s Serverless Forwarder**](https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3) (runs Lambda and available in AWS SAR) sends logs from Kinesis Data Stream, Amazon S3, and AWS Cloudwatch log groups into Elastic. To learn more about this topic, please see this [blog post](https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3).\\n- [**Amazon Kinesis Firehose**](https://docs.aws.amazon.com/firehose/latest/dev/what-is-this-service.html) directly ingests logs from AWS into Elastic (specifically, if you are running the Elastic Cloud on AWS).\\n\\nIn this blog, we will cover the last option since we have recently released the Amazon Kinesis Data Firehose integration. Specifically, we\'ll review:\\n\\n- A general overview of the Amazon Kinesis Data Firehose integration and how it works with AWS\\n- Step-by-step instructions to set up the Amazon Kinesis Data Firehose integration on AWS and on [Elastic Cloud](https://cloud.elastic.co)\\n\\nBy the end of this blog, you\'ll be equipped with the knowledge and tools to simplify your AWS log management with Elastic Observability and Amazon Kinesis Data Firehose.\\n\\n## Prerequisites and configurations\\n\\nIf you intend to follow the steps outlined in this blog post, there are a few prerequisites and configurations that you should have in place beforehand.\\n\\n1. You will need an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack on AWS. Instructions for deploying a stack on AWS can be found [here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html). This is necessary for AWS Firehose Log ingestion.\\n2. You will also need an AWS account with the necessary permissions to pull data from AWS. Details on the required permissions can be found in our [documentation](https://docs.elastic.co/en/integrations/aws#aws-permissions).\\n3. Finally, be sure to turn on VPC Flow Logs for the VPC where your application is deployed and send them to AWS Firehose.\\n\\n## Elastic’s Amazon Kinesis Data Firehose integration\\n\\nElastic has collaborated with AWS to offer a seamless integration of Amazon Kinesis Data Firehose with Elastic, enabling direct ingestion of data from Amazon Kinesis Data Firehose into Elastic without the need for Agents or Beats. All you need to do is configure the Amazon Kinesis Data Firehose delivery stream to send its data to Elastic\'s endpoint. In this configuration, we will demonstrate how to ingest VPC Flow logs and Firewall logs into Elastic. You can follow a similar process to ingest other logs from your AWS environment into Elastic.\\n\\nThere are three distinct configurations available for ingesting VPC Flow and Network firewall logs into Elastic. One configuration involves sending logs through CloudWatch, and another uses S3 and Kinesis Firehose; each has its own unique setup. With Cloudwatch and S3 you can store and forward but with Kinesis Firehose you will have to ingest immediately. However, in this blog post, we will focus on this new configuration that involves sending VPC Flow logs and Network Firewall logs directly to Elastic.\\n\\n![AWS elastic configuration](/assets/images/aws-kinesis-data-firehose-observability-analytics/image2.png)\\n\\nWe will guide you through the configuration of the easiest setup, which involves directly sending VPC Flow logs and Firewalls logs to Amazon Kinesis Data Firehose and then into Elastic Cloud.\\n\\n**Note:** It\'s important to note that this setup is only compatible with Elastic Cloud on AWS and cannot be used with self-managed or on-premise or other cloud provider Elastic deployments.\\n\\n## Setting it all up\\n\\nTo begin setting up the integration between Amazon Kinesis Data Firehose and Elastic, let\'s go through the necessary steps.\\n\\n### Step 0: Get an account on Elastic Cloud\\n\\nCreate an account on Elastic Cloud by following the instructions provided to [get started on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home).\\n\\n![elastic free trial](/assets/images/aws-kinesis-data-firehose-observability-analytics/Screenshot_2023-05-18_at_6.00.28_PM.png)\\n\\n### Step 1: Deploy Elastic on AWS\\n\\nYou can deploy Elastic on AWS via two different approaches: through the UI or through Terraform. We’ll start first with the UI option.\\n\\nAfter logging into Elastic Cloud, create a deployment on Elastic. It\'s crucial to make sure that the deployment is on Elastic Cloud on AWS since the Amazon Kinesis Data Firehose connects to a specific endpoint that must be on AWS.\\n\\n![create a deployment](/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-create-a-deployment.png)\\n\\nAfter your deployment is created, it\'s essential to copy the Elasticsearch endpoint to ensure a seamless configuration process.\\n\\n![O11y log](/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-O11y-log.png)\\n\\nThe Elasticsearch HTTP endpoint should be copied and used for Amazon Firehose destination configuration purposes, as it will be required. Here\'s an example of what the endpoint should look like:\\n\\n```bash\\nhttps://elastic-O11y-log.es.us-east-1.aws.found.io\\n```\\n\\n### _Alternative approach using Terraform_\\n\\nAn alternative approach to deploying Elastic Cloud on AWS is by using Terraform. It\'s also an effective way to automate and streamline the deployment process.\\n\\nTo begin, simply create a Terraform configuration file that outlines the necessary infrastructure. This file should include resources for your Elastic Cloud deployment and any required IAM roles and policies. By using this approach, you can simplify the deployment process and ensure consistency across environments.\\n\\nOne easy way to create your Elastic Cloud deployment with Terraform is to use this Github [repo](https://github.com/aws-ia/terraform-elastic-cloud). This resource lets you specify the region, version, and deployment template for your Elastic Cloud deployment, as well as any additional settings you require.\\n\\n### Step 2: To turn on Elastic\'s AWS integrations, navigate to the Elastic Integration section in your deployment\\n\\nTo install AWS assets in your deployment\'s Elastic Integration section, follow these steps:\\n\\n1. Log in to your Elastic Cloud deployment and open **Kibana**.\\n2. To get started, go to the **management** section of Kibana and click on \\" **Integrations.**\\"\\n3. Navigate to the **AWS** integration and click on the \\"Install AWS Assets\\" button in the **settings**.This step is important as it installs the necessary assets such as **dashboards** and **ingest pipelines** to enable data ingestion from AWS services into Elastic.\\n\\n![aws settings](/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-aws-settings.png)\\n\\n### Step 3: Set up the Amazon Kinesis Data Firehose delivery stream on the AWS Console\\n\\nYou can set up the Kinesis Data Firehose delivery stream via two different approaches: through the AWS Management Console or through Terraform. We’ll start first with the console option.\\n\\nTo set up the Kinesis Data Firehose delivery stream on AWS, follow these [steps](https://docs.aws.amazon.com/firehose/latest/dev/create-destination.html#create-destination-elastic):\\n\\n1. Go to the AWS Management Console and select Amazon Kinesis Data Firehose.\\n\\n2. Click on Create delivery stream.\\n\\n3. Choose a delivery stream name and select Direct PUT or other sources as the source.\\n\\n![create delivery stream](/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-create-delivery-stream.png)\\n\\n4. Choose Elastic as the destination.\\n\\n5. In the Elastic destination section, enter the Elastic endpoint URL that you copied from your Elastic Cloud deployment.\\n\\n![destination settings](/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-destination-settings.png)\\n\\n6. Choose the content encoding and retry duration as shown above.\\n\\n7. Enter the appropriate parameter values for your AWS log type. For example, for VPC Flow logs, you would need to specify the _ **es_datastream_name** _ and _ **logs-aws.vpc flow-default** _.\\n\\n8. Configure the Amazon S3 bucket as the source backup for the Amazon Kinesis Data Firehose delivery stream failed data or all data, and configure any required tags for the delivery stream.\\n\\n9. Review the settings and click on Create delivery stream.\\n\\nIn the example above, we are using the **es_datastream_name** parameter to pull in VPC Flow logs through the **logs-aws.vpcflow-default** datastream. Depending on your use case, this parameter can be configured with one of the following types of logs:\\n\\n- logs-aws.cloudfront_logs-default (AWS CloudFront logs)\\n- logs-aws.ec2_logs-default (EC2 logs in AWS CloudWatch)\\n- logs-aws.elb_logs-default (Amazon Elastic Load Balancing logs)\\n- logs-aws.firewall_logs-default (AWS Network Firewall logs)\\n- logs-aws.route53_public_logs-default (Amazon Route 53 public DNS queries logs)\\n- logs-aws.route53_resolver_logs-default (Amazon Route 53 DNS queries & responses logs)\\n- logs-aws.s3access-default (Amazon S3 server access log)\\n- logs-aws.vpcflow-default (AWS VPC flow logs)\\n- logs-aws.waf-default (AWS WAF Logs)\\n\\n### _Alternative approach using Terraform_\\n\\nUsing the \\" **aws_kinesis_firehose_delivery_stream**\\" resource in **Terraform** is another way to create a Kinesis Firehose delivery stream, allowing you to specify the delivery stream name, data source, and destination - in this case, an Elasticsearch HTTP endpoint. To authenticate, you\'ll need to provide the endpoint URL and an API key. Leveraging this Terraform resource is a fantastic way to automate and streamline your deployment process, resulting in greater consistency and efficiency.\\n\\nHere\'s an example code that shows you how to create a Kinesis Firehose delivery stream with Terraform that sends data to an Elasticsearch HTTP endpoint:\\n\\n```hcl\\nresource \\"aws_kinesis_firehose_delivery_stream\\" “Elasticcloud_stream\\" {\\n name = \\"terraform-kinesis-firehose-ElasticCloud-stream\\"\\n destination = \\"http_endpoint”\\n s3_configuration {\\n role_arn = aws_iam_role.firehose.arn\\n bucket_arn = aws_s3_bucket.bucket.arn\\n buffer_size = 5\\n buffer_interval = 300\\n compression_format = \\"GZIP\\"\\n }\\n http_endpoint_configuration {\\n url = \\"https://cloud.elastic.co/\\"\\n name = “ElasticCloudEndpoint\\"\\n access_key = “ElasticApi-key\\"\\n buffering_hints {\\n size_in_mb = 5\\n interval_in_seconds = 300\\n }\\n\\n role_arn = \\"arn:Elastic_role\\"\\n s3_backup_mode = \\"FailedDataOnly\\"\\n }\\n}\\n```\\n\\n### Step 4: Configure VPC Flow Logs to send to Amazon Kinesis Data Firehose\\n\\nTo complete the setup, you\'ll need to configure VPC Flow logs in the VPC where your application is deployed and send them to the Amazon Kinesis Data Firehose delivery stream you set up in Step 3.\\n\\nEnabling VPC flow logs in AWS is a straightforward process that involves several steps. Here\'s a step-by-step details to enable VPC flow logs in your AWS account:\\n\\n1. Select the VPC for which you want to enable flow logs.\\n\\n2. In the VPC dashboard, click on \\"Flow Logs\\" under the \\"Logs\\" section.\\n\\n3. Click on the \\"Create Flow Log\\" button to create a new flow log.\\n\\n4. In the \\"Create Flow Log\\" wizard, provide the following information:\\n\\nChoose the target for your flow logs: In this case, Amazon Kinesis Data Firehose in the same AWS account.\\n\\n- Provide a name for your flow log.\\n- Choose the VPC and the network interface(s) for which you want to enable flow logs.\\n- Choose the flow log format: either AWS default or Custom format.\\n\\n5. Configure the IAM role for the flow logs. If you have an existing IAM role, select it. Otherwise, create a new IAM role that grants the necessary permissions for the flow logs.\\n\\n6. Review the flow log configuration and click \\"Create.\\"\\n\\n![flow log settings](/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-flow-log-settings.png)\\n\\nCreate the VPC Flow log.\\n\\n### Step 5: After a few minutes, check if flows are coming into Elastic\\n\\nTo confirm that the VPC Flow logs are ingesting into Elastic, you can check the logs in Kibana. You can do this by searching for the index in the Kibana Discover tab and filtering the results by the appropriate index and time range. If VPC Flow logs are flowing in, you should see a list of documents representing the VPC Flow logs.\\n\\n![expanded document](/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-expanded-document.png)\\n\\n### Step 6: Navigate to Kibana to see your logs parsed and visualized in the [Logs AWS] VPC Flow Log Overview dashboard\\n\\nFinally, there is an Elastic out-of-the-box (OOTB) VPC Flow logs dashboard that displays the top IP addresses that are hitting your VPC, their geographic location, time series of the flows, and a summary of VPC flow log rejects within the selected time frame. This dashboard can provide valuable insights into your network traffic and potential security threats.\\n\\n![vpc flow log map](/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-VPC-flow-log-map.png)\\n\\n_Note: For additional VPC flow log analysis capabilities, please refer to_ [_this blog_](https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability)_._\\n\\n### Step 7: Configure AWS Network Firewall Logs to send to Kinesis Firehose\\n\\nTo create a Kinesis Data Firehose delivery stream for AWS Network firewall logs, first log in to the AWS Management Console, navigate to the Kinesis service, select \\"Data Firehose\\", and follow the step-by-step instructions as shown in Step 3. Specify the Elasticsearch endpoint, API key, add a parameter (_ **es_datastream_name=logs-aws.firewall_logs-default** _), and create the delivery stream.\\n\\nSecond, to set up a Network Firewall rule group to send logs to the Kinesis Firehose, go to the Network Firewall section of the console, create a rule group, add a rule to allow traffic to the Kinesis endpoint, and attach the rule group to your Network Firewall configuration. Finally, test the configuration by sending traffic through the Network Firewall to the Kinesis Firehose endpoint and verify that logs are being delivered to your S3 bucket.\\n\\nKindly follow the instructions below to set up a firewall rule and logging.\\n\\n1. Set up a Network Firewall rule group to send logs to Amazon Kinesis Data Firehose:\\n\\n- Go to the AWS Management Console and select Network Firewall.\\n- Click on \\"Rule groups\\" in the left menu and then click \\"Create rule group.\\"\\n- Choose \\"Stateless\\" or \\"Stateful\\" depending on your requirements, and give your rule group a name. Click \\"Create rule group.\\"\\n- Add a rule to the rule group to allow traffic to the Kinesis Firehose endpoint. For example, if you are using the us-east-1 region, you would add a rule like this:json\\n\\n```json\\n{\\n \\"RuleDefinition\\": {\\n \\"Actions\\": [\\n {\\n \\"Type\\": \\"AWS::KinesisFirehose::DeliveryStream\\",\\n \\"Options\\": {\\n \\"DeliveryStreamArn\\": \\"arn:aws:firehose:us-east-1:12387389012:deliverystream/my-delivery-stream\\"\\n }\\n }\\n ],\\n \\"MatchAttributes\\": {\\n \\"Destination\\": {\\n \\"Addresses\\": [\\"api.firehose.us-east-1.amazonaws.com\\"]\\n },\\n \\"Protocol\\": {\\n \\"Numeric\\": 6,\\n \\"Type\\": \\"TCP\\"\\n },\\n \\"PortRanges\\": [\\n {\\n \\"From\\": 443,\\n \\"To\\": 443\\n }\\n ]\\n }\\n },\\n \\"RuleOptions\\": {\\n \\"CustomTCPStarter\\": {\\n \\"Enabled\\": true,\\n \\"PortNumber\\": 443\\n }\\n }\\n}\\n```\\n\\n- Save the rule group.\\n\\n2. Attach the rule group to your Network Firewall configuration:\\n\\n- Go to the AWS Management Console and select Network Firewall.\\n- Click on \\"Firewall configurations\\" in the left menu and select the configuration you want to attach the rule group to.\\n- Scroll down to \\"Associations\\" and click \\"Edit.\\"\\n- Select the rule group you created in Step 2 and click \\"Save.\\"\\n\\n3. Test the configuration:\\n\\n- Send traffic through the Network Firewall to the Kinesis Firehose endpoint and verify that logs are being delivered to your S3 bucket.\\n\\n### Step 8: Navigate to Kibana to see your logs parsed and visualized in the [Logs AWS] Firewall Log dashboard\\n\\n![firewall log dashboard](/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-firewall-log-dashboard.png)\\n\\n## Wrapping up\\n\\nWe’re excited to bring you this latest integration for AWS Cloud and Kinesis Data Firehose into production. The ability to consolidate logs and metrics to gain visibility across your cloud and on-premises environment is crucial for today’s distributed environments and applications.\\n\\nFrom EC2, Cloudwatch, Lambda, ECS and SAR, [Elastic Integrations](https://www.elastic.co/integrations/data-integrations?solution=all-solutions&category=aws) allow you to quickly and easily get started with ingesting your telemetry data for monitoring, analytics, and observability. Elastic is constantly delivering frictionless customer experiences, allowing anytime, anywhere access to all of your telemetry data — this streamlined, native integration with AWS is the latest example of our commitment.\\n\\n## Start a free trial today\\n\\nYou can begin with a [7-day free trial](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k) of Elastic Cloud within the AWS Marketplace to start monitoring and improving your users\' experience today!\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var g=Object.create;var a=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),y=(i,e)=>{for(var t in e)a(i,t,{get:e[t],enumerable:!0})},r=(i,e,t,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of p(e))!f.call(i,o)&&o!==t&&a(i,o,{get:()=>e[o],enumerable:!(s=u(e,o))||s.enumerable});return i};var b=(i,e,t)=>(t=i!=null?g(m(i)):{},r(e||!i||!i.__esModule?a(t,\\"default\\",{value:i,enumerable:!0}):t,i)),v=i=>r(a({},\\"__esModule\\",{value:!0}),i);var c=w((F,l)=>{l.exports=_jsx_runtime});var S={};y(S,{default:()=>d,frontmatter:()=>A});var n=b(c()),A={title:\\"Unleash the power of Elastic and Amazon Kinesis Data Firehose to enhance observability and data analytics\\",slug:\\"aws-kinesis-data-firehose-observability-analytics\\",date:\\"2023-05-18\\",description:\\"AWS users can now leverage the new Amazon Kinesis Firehose Delivery Stream to directly ingest logs into Elastic Cloud in real time for centralized alerting, troubleshooting, and analytics across your cloud and on-premises infrastructure.\\",author:[{slug:\\"udayasimha-theepireddy-uday\\"},{slug:\\"bahubali-shetti\\"}],image:\\"image2.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"aws\\"},{slug:\\"log-analytics\\"},{slug:\\"aws-kinesis-data-firehose\\"}]};function h(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"As more organizations leverage the Amazon Web Services (AWS) cloud platform and services to drive operational efficiency and bring products to market, managing logs becomes a critical component of maintaining visibility and safeguarding multi-account AWS environments. Traditionally, logs are stored in Amazon Simple Storage Service (Amazon S3) and then shipped to an external monitoring and analysis solution for further processing.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To simplify this process and reduce management overhead, AWS users can now leverage the new Amazon Kinesis Firehose Delivery Stream to ingest logs into Elastic Cloud in AWS in real time and view them in the Elastic Stack alongside other logs for centralized analytics. This eliminates the necessity for time-consuming and expensive procedures such as VM provisioning or data shipper operations.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic Observability unifies logs, metrics, and application performance monitoring (APM) traces for a full contextual view across your hybrid \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"AWS environments alongside their on-premises data sets\\"}),\\". Elastic Observability enables you to track and monitor performance \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/aws-monitoring\\",rel:\\"nofollow\\",children:\\"across a broad range of AWS services\\"}),\\", including AWS Lambda, Amazon Elastic Compute Cloud (EC2), Amazon Elastic Container Service (ECS), Amazon Elastic Kubernetes Service (EKS), Amazon Simple Storage Service (S3), Amazon Cloudtrail, Amazon Network Firewall, and more.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In this blog, we will walk you through how to use the Amazon Kinesis Data Firehose integration \\\\u2014 \\",(0,n.jsx)(e.a,{href:\\"https://aws.amazon.com/blogs/big-data/accelerate-data-insights-with-elastic-and-amazon-kinesis-data-firehose/\\",rel:\\"nofollow\\",children:\\"Elastic is listed in the Amazon Kinesis Firehose\\"}),\\" drop-down list \\\\u2014 to simplify your architecture and send logs to Elastic, so you can monitor and safeguard your multi-account AWS environments.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"announcing-the-kinesis-firehose-method\\",children:\\"Announcing the Kinesis Firehose method\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic currently provides both agent-based and serverless mechanisms, and we are pleased to announce the addition of the Kinesis Firehose method. This new method enables customers to directly ingest logs from AWS into Elastic, supplementing our existing options.\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://www.youtube.com/watch?v=pnGXjljuEnY\\",rel:\\"nofollow\\",children:(0,n.jsx)(e.strong,{children:\\"Elastic Agent\\"})}),\\" pulls metrics and logs from CloudWatch and S3 where logs are generally pushed from a service (for example, EC2, ELB, WAF, Route53) and ingests them into Elastic Cloud.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3\\",rel:\\"nofollow\\",children:(0,n.jsx)(e.strong,{children:\\"Elastic\\\\u2019s Serverless Forwarder\\"})}),\\" (runs Lambda and available in AWS SAR) sends logs from Kinesis Data Stream, Amazon S3, and AWS Cloudwatch log groups into Elastic. To learn more about this topic, please see this \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3\\",rel:\\"nofollow\\",children:\\"blog post\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/firehose/latest/dev/what-is-this-service.html\\",rel:\\"nofollow\\",children:(0,n.jsx)(e.strong,{children:\\"Amazon Kinesis Firehose\\"})}),\\" directly ingests logs from AWS into Elastic (specifically, if you are running the Elastic Cloud on AWS).\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we will cover the last option since we have recently released the Amazon Kinesis Data Firehose integration. Specifically, we\'ll review:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"A general overview of the Amazon Kinesis Data Firehose integration and how it works with AWS\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Step-by-step instructions to set up the Amazon Kinesis Data Firehose integration on AWS and on \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"})]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"By the end of this blog, you\'ll be equipped with the knowledge and tools to simplify your AWS log management with Elastic Observability and Amazon Kinesis Data Firehose.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"prerequisites-and-configurations\\",children:\\"Prerequisites and configurations\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you intend to follow the steps outlined in this blog post, there are a few prerequisites and configurations that you should have in place beforehand.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"You will need an account on \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack on AWS. Instructions for deploying a stack on AWS can be found \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". This is necessary for AWS Firehose Log ingestion.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"You will also need an AWS account with the necessary permissions to pull data from AWS. Details on the required permissions can be found in our \\",(0,n.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#aws-permissions\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Finally, be sure to turn on VPC Flow Logs for the VPC where your application is deployed and send them to AWS Firehose.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"elastics-amazon-kinesis-data-firehose-integration\\",children:\\"Elastic\\\\u2019s Amazon Kinesis Data Firehose integration\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic has collaborated with AWS to offer a seamless integration of Amazon Kinesis Data Firehose with Elastic, enabling direct ingestion of data from Amazon Kinesis Data Firehose into Elastic without the need for Agents or Beats. All you need to do is configure the Amazon Kinesis Data Firehose delivery stream to send its data to Elastic\'s endpoint. In this configuration, we will demonstrate how to ingest VPC Flow logs and Firewall logs into Elastic. You can follow a similar process to ingest other logs from your AWS environment into Elastic.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"There are three distinct configurations available for ingesting VPC Flow and Network firewall logs into Elastic. One configuration involves sending logs through CloudWatch, and another uses S3 and Kinesis Firehose; each has its own unique setup. With Cloudwatch and S3 you can store and forward but with Kinesis Firehose you will have to ingest immediately. However, in this blog post, we will focus on this new configuration that involves sending VPC Flow logs and Network Firewall logs directly to Elastic.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/image2.png\\",alt:\\"AWS elastic configuration\\",width:\\"1820\\",height:\\"844\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We will guide you through the configuration of the easiest setup, which involves directly sending VPC Flow logs and Firewalls logs to Amazon Kinesis Data Firehose and then into Elastic Cloud.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Note:\\"}),\\" It\'s important to note that this setup is only compatible with Elastic Cloud on AWS and cannot be used with self-managed or on-premise or other cloud provider Elastic deployments.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"setting-it-all-up\\",children:\\"Setting it all up\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To begin setting up the integration between Amazon Kinesis Data Firehose and Elastic, let\'s go through the necessary steps.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-0-get-an-account-on-elastic-cloud\\",children:\\"Step 0: Get an account on Elastic Cloud\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Create an account on Elastic Cloud by following the instructions provided to \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/Screenshot_2023-05-18_at_6.00.28_PM.png\\",alt:\\"elastic free trial\\",width:\\"327\\",height:\\"283\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-1-deploy-elastic-on-aws\\",children:\\"Step 1: Deploy Elastic on AWS\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"You can deploy Elastic on AWS via two different approaches: through the UI or through Terraform. We\\\\u2019ll start first with the UI option.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"After logging into Elastic Cloud, create a deployment on Elastic. It\'s crucial to make sure that the deployment is on Elastic Cloud on AWS since the Amazon Kinesis Data Firehose connects to a specific endpoint that must be on AWS.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-create-a-deployment.png\\",alt:\\"create a deployment\\",width:\\"1144\\",height:\\"1006\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"After your deployment is created, it\'s essential to copy the Elasticsearch endpoint to ensure a seamless configuration process.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-O11y-log.png\\",alt:\\"O11y log\\",width:\\"712\\",height:\\"662\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The Elasticsearch HTTP endpoint should be copied and used for Amazon Firehose destination configuration purposes, as it will be required. Here\'s an example of what the endpoint should look like:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`https://elastic-O11y-log.es.us-east-1.aws.found.io\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"alternative-approach-using-terraform\\",children:(0,n.jsx)(e.em,{children:\\"Alternative approach using Terraform\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"An alternative approach to deploying Elastic Cloud on AWS is by using Terraform. It\'s also an effective way to automate and streamline the deployment process.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To begin, simply create a Terraform configuration file that outlines the necessary infrastructure. This file should include resources for your Elastic Cloud deployment and any required IAM roles and policies. By using this approach, you can simplify the deployment process and ensure consistency across environments.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"One easy way to create your Elastic Cloud deployment with Terraform is to use this Github \\",(0,n.jsx)(e.a,{href:\\"https://github.com/aws-ia/terraform-elastic-cloud\\",rel:\\"nofollow\\",children:\\"repo\\"}),\\". This resource lets you specify the region, version, and deployment template for your Elastic Cloud deployment, as well as any additional settings you require.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-2-to-turn-on-elastics-aws-integrations-navigate-to-the-elastic-integration-section-in-your-deployment\\",children:\\"Step 2: To turn on Elastic\'s AWS integrations, navigate to the Elastic Integration section in your deployment\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To install AWS assets in your deployment\'s Elastic Integration section, follow these steps:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Log in to your Elastic Cloud deployment and open \\",(0,n.jsx)(e.strong,{children:\\"Kibana\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"To get started, go to the \\",(0,n.jsx)(e.strong,{children:\\"management\\"}),\' section of Kibana and click on \\" \',(0,n.jsx)(e.strong,{children:\\"Integrations.\\"}),\'\\"\']}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Navigate to the \\",(0,n.jsx)(e.strong,{children:\\"AWS\\"}),\' integration and click on the \\"Install AWS Assets\\" button in the \',(0,n.jsx)(e.strong,{children:\\"settings\\"}),\\".This step is important as it installs the necessary assets such as \\",(0,n.jsx)(e.strong,{children:\\"dashboards\\"}),\\" and \\",(0,n.jsx)(e.strong,{children:\\"ingest pipelines\\"}),\\" to enable data ingestion from AWS services into Elastic.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-aws-settings.png\\",alt:\\"aws settings\\",width:\\"1999\\",height:\\"684\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-3-set-up-the-amazon-kinesis-data-firehose-delivery-stream-on-the-aws-console\\",children:\\"Step 3: Set up the Amazon Kinesis Data Firehose delivery stream on the AWS Console\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"You can set up the Kinesis Data Firehose delivery stream via two different approaches: through the AWS Management Console or through Terraform. We\\\\u2019ll start first with the console option.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To set up the Kinesis Data Firehose delivery stream on AWS, follow these \\",(0,n.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/firehose/latest/dev/create-destination.html#create-destination-elastic\\",rel:\\"nofollow\\",children:\\"steps\\"}),\\":\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Go to the AWS Management Console and select Amazon Kinesis Data Firehose.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Click on Create delivery stream.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Choose a delivery stream name and select Direct PUT or other sources as the source.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-create-delivery-stream.png\\",alt:\\"create delivery stream\\",width:\\"1240\\",height:\\"706\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Choose Elastic as the destination.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"In the Elastic destination section, enter the Elastic endpoint URL that you copied from your Elastic Cloud deployment.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-destination-settings.png\\",alt:\\"destination settings\\",width:\\"1194\\",height:\\"1844\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"6\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Choose the content encoding and retry duration as shown above.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Enter the appropriate parameter values for your AWS log type. For example, for VPC Flow logs, you would need to specify the _ \\",(0,n.jsx)(e.strong,{children:\\"es_datastream_name\\"}),\\" _ and _ \\",(0,n.jsx)(e.strong,{children:\\"logs-aws.vpc flow-default\\"}),\\" _.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Configure the Amazon S3 bucket as the source backup for the Amazon Kinesis Data Firehose delivery stream failed data or all data, and configure any required tags for the delivery stream.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Review the settings and click on Create delivery stream.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In the example above, we are using the \\",(0,n.jsx)(e.strong,{children:\\"es_datastream_name\\"}),\\" parameter to pull in VPC Flow logs through the \\",(0,n.jsx)(e.strong,{children:\\"logs-aws.vpcflow-default\\"}),\\" datastream. Depending on your use case, this parameter can be configured with one of the following types of logs:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"logs-aws.cloudfront_logs-default (AWS CloudFront logs)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"logs-aws.ec2_logs-default (EC2 logs in AWS CloudWatch)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"logs-aws.elb_logs-default (Amazon Elastic Load Balancing logs)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"logs-aws.firewall_logs-default (AWS Network Firewall logs)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"logs-aws.route53_public_logs-default (Amazon Route 53 public DNS queries logs)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"logs-aws.route53_resolver_logs-default (Amazon Route 53 DNS queries & responses logs)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"logs-aws.s3access-default (Amazon S3 server access log)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"logs-aws.vpcflow-default (AWS VPC flow logs)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"logs-aws.waf-default (AWS WAF Logs)\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"alternative-approach-using-terraform-1\\",children:(0,n.jsx)(e.em,{children:\\"Alternative approach using Terraform\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\'Using the \\" \',(0,n.jsx)(e.strong,{children:\\"aws_kinesis_firehose_delivery_stream\\"}),\'\\" resource in \',(0,n.jsx)(e.strong,{children:\\"Terraform\\"}),\\" is another way to create a Kinesis Firehose delivery stream, allowing you to specify the delivery stream name, data source, and destination - in this case, an Elasticsearch HTTP endpoint. To authenticate, you\'ll need to provide the endpoint URL and an API key. Leveraging this Terraform resource is a fantastic way to automate and streamline your deployment process, resulting in greater consistency and efficiency.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here\'s an example code that shows you how to create a Kinesis Firehose delivery stream with Terraform that sends data to an Elasticsearch HTTP endpoint:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-hcl\\",children:`resource \\"aws_kinesis_firehose_delivery_stream\\" \\\\u201CElasticcloud_stream\\" {\\n name = \\"terraform-kinesis-firehose-ElasticCloud-stream\\"\\n destination = \\"http_endpoint\\\\u201D\\n s3_configuration {\\n role_arn = aws_iam_role.firehose.arn\\n bucket_arn = aws_s3_bucket.bucket.arn\\n buffer_size = 5\\n buffer_interval = 300\\n compression_format = \\"GZIP\\"\\n }\\n http_endpoint_configuration {\\n url = \\"https://cloud.elastic.co/\\"\\n name = \\\\u201CElasticCloudEndpoint\\"\\n access_key = \\\\u201CElasticApi-key\\"\\n buffering_hints {\\n size_in_mb = 5\\n interval_in_seconds = 300\\n }\\n\\n role_arn = \\"arn:Elastic_role\\"\\n s3_backup_mode = \\"FailedDataOnly\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-4-configure-vpc-flow-logs-to-send-to-amazon-kinesis-data-firehose\\",children:\\"Step 4: Configure VPC Flow Logs to send to Amazon Kinesis Data Firehose\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To complete the setup, you\'ll need to configure VPC Flow logs in the VPC where your application is deployed and send them to the Amazon Kinesis Data Firehose delivery stream you set up in Step 3.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Enabling VPC flow logs in AWS is a straightforward process that involves several steps. Here\'s a step-by-step details to enable VPC flow logs in your AWS account:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Select the VPC for which you want to enable flow logs.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\'In the VPC dashboard, click on \\"Flow Logs\\" under the \\"Logs\\" section.\'}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\'Click on the \\"Create Flow Log\\" button to create a new flow log.\'}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\'In the \\"Create Flow Log\\" wizard, provide the following information:\'}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Choose the target for your flow logs: In this case, Amazon Kinesis Data Firehose in the same AWS account.\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Provide a name for your flow log.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Choose the VPC and the network interface(s) for which you want to enable flow logs.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Choose the flow log format: either AWS default or Custom format.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Configure the IAM role for the flow logs. If you have an existing IAM role, select it. Otherwise, create a new IAM role that grants the necessary permissions for the flow logs.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\'Review the flow log configuration and click \\"Create.\\"\'}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-flow-log-settings.png\\",alt:\\"flow log settings\\",width:\\"1178\\",height:\\"1284\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Create the VPC Flow log.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-5-after-a-few-minutes-check-if-flows-are-coming-into-elastic\\",children:\\"Step 5: After a few minutes, check if flows are coming into Elastic\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To confirm that the VPC Flow logs are ingesting into Elastic, you can check the logs in Kibana. You can do this by searching for the index in the Kibana Discover tab and filtering the results by the appropriate index and time range. If VPC Flow logs are flowing in, you should see a list of documents representing the VPC Flow logs.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-expanded-document.png\\",alt:\\"expanded document\\",width:\\"1999\\",height:\\"945\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-6-navigate-to-kibana-to-see-your-logs-parsed-and-visualized-in-the-logs-aws-vpc-flow-log-overview-dashboard\\",children:\\"Step 6: Navigate to Kibana to see your logs parsed and visualized in the [Logs AWS] VPC Flow Log Overview dashboard\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Finally, there is an Elastic out-of-the-box (OOTB) VPC Flow logs dashboard that displays the top IP addresses that are hitting your VPC, their geographic location, time series of the flows, and a summary of VPC flow log rejects within the selected time frame. This dashboard can provide valuable insights into your network traffic and potential security threats.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-VPC-flow-log-map.png\\",alt:\\"vpc flow log map\\",width:\\"1999\\",height:\\"1056\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.em,{children:\\"Note: For additional VPC flow log analysis capabilities, please refer to\\"}),\\" \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability\\",rel:\\"nofollow\\",children:(0,n.jsx)(e.em,{children:\\"this blog\\"})}),(0,n.jsx)(e.em,{children:\\".\\"})]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-7-configure-aws-network-firewall-logs-to-send-to-kinesis-firehose\\",children:\\"Step 7: Configure AWS Network Firewall Logs to send to Kinesis Firehose\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\'To create a Kinesis Data Firehose delivery stream for AWS Network firewall logs, first log in to the AWS Management Console, navigate to the Kinesis service, select \\"Data Firehose\\", and follow the step-by-step instructions as shown in Step 3. Specify the Elasticsearch endpoint, API key, add a parameter (_ \',(0,n.jsx)(e.strong,{children:\\"es_datastream_name=logs-aws.firewall_logs-default\\"}),\\" _), and create the delivery stream.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Second, to set up a Network Firewall rule group to send logs to the Kinesis Firehose, go to the Network Firewall section of the console, create a rule group, add a rule to allow traffic to the Kinesis endpoint, and attach the rule group to your Network Firewall configuration. Finally, test the configuration by sending traffic through the Network Firewall to the Kinesis Firehose endpoint and verify that logs are being delivered to your S3 bucket.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Kindly follow the instructions below to set up a firewall rule and logging.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Set up a Network Firewall rule group to send logs to Amazon Kinesis Data Firehose:\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Go to the AWS Management Console and select Network Firewall.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\'Click on \\"Rule groups\\" in the left menu and then click \\"Create rule group.\\"\'}),`\\n`,(0,n.jsx)(e.li,{children:\'Choose \\"Stateless\\" or \\"Stateful\\" depending on your requirements, and give your rule group a name. Click \\"Create rule group.\\"\'}),`\\n`,(0,n.jsx)(e.li,{children:\\"Add a rule to the rule group to allow traffic to the Kinesis Firehose endpoint. For example, if you are using the us-east-1 region, you would add a rule like this:json\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"RuleDefinition\\": {\\n \\"Actions\\": [\\n {\\n \\"Type\\": \\"AWS::KinesisFirehose::DeliveryStream\\",\\n \\"Options\\": {\\n \\"DeliveryStreamArn\\": \\"arn:aws:firehose:us-east-1:12387389012:deliverystream/my-delivery-stream\\"\\n }\\n }\\n ],\\n \\"MatchAttributes\\": {\\n \\"Destination\\": {\\n \\"Addresses\\": [\\"api.firehose.us-east-1.amazonaws.com\\"]\\n },\\n \\"Protocol\\": {\\n \\"Numeric\\": 6,\\n \\"Type\\": \\"TCP\\"\\n },\\n \\"PortRanges\\": [\\n {\\n \\"From\\": 443,\\n \\"To\\": 443\\n }\\n ]\\n }\\n },\\n \\"RuleOptions\\": {\\n \\"CustomTCPStarter\\": {\\n \\"Enabled\\": true,\\n \\"PortNumber\\": 443\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Save the rule group.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Attach the rule group to your Network Firewall configuration:\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Go to the AWS Management Console and select Network Firewall.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\'Click on \\"Firewall configurations\\" in the left menu and select the configuration you want to attach the rule group to.\'}),`\\n`,(0,n.jsx)(e.li,{children:\'Scroll down to \\"Associations\\" and click \\"Edit.\\"\'}),`\\n`,(0,n.jsx)(e.li,{children:\'Select the rule group you created in Step 2 and click \\"Save.\\"\'}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Test the configuration:\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Send traffic through the Network Firewall to the Kinesis Firehose endpoint and verify that logs are being delivered to your S3 bucket.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-8-navigate-to-kibana-to-see-your-logs-parsed-and-visualized-in-the-logs-aws-firewall-log-dashboard\\",children:\\"Step 8: Navigate to Kibana to see your logs parsed and visualized in the [Logs AWS] Firewall Log dashboard\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-kinesis-data-firehose-observability-analytics/blog-elastic-firewall-log-dashboard.png\\",alt:\\"firewall log dashboard\\",width:\\"1999\\",height:\\"914\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"wrapping-up\\",children:\\"Wrapping up\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We\\\\u2019re excited to bring you this latest integration for AWS Cloud and Kinesis Data Firehose into production. The ability to consolidate logs and metrics to gain visibility across your cloud and on-premises environment is crucial for today\\\\u2019s distributed environments and applications.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"From EC2, Cloudwatch, Lambda, ECS and SAR, \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations?solution=all-solutions&category=aws\\",rel:\\"nofollow\\",children:\\"Elastic Integrations\\"}),\\" allow you to quickly and easily get started with ingesting your telemetry data for monitoring, analytics, and observability. Elastic is constantly delivering frictionless customer experiences, allowing anytime, anywhere access to all of your telemetry data \\\\u2014 this streamlined, native integration with AWS is the latest example of our commitment.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"start-a-free-trial-today\\",children:\\"Start a free trial today\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You can begin with a \\",(0,n.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k\\",rel:\\"nofollow\\",children:\\"7-day free trial\\"}),\\" of Elastic Cloud within the AWS Marketplace to start monitoring and improving your users\' experience today!\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,n.jsx)(e,{...i,children:(0,n.jsx)(h,{...i})}):h(i)}return v(S);})();\\n;return Component;"},"_id":"articles/aws-kinesis-data-firehose-elastic-observability-analytics.mdx","_raw":{"sourceFilePath":"articles/aws-kinesis-data-firehose-elastic-observability-analytics.mdx","sourceFileName":"aws-kinesis-data-firehose-elastic-observability-analytics.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/aws-kinesis-data-firehose-elastic-observability-analytics"},"type":"Article","imageUrl":"/assets/images/aws-kinesis-data-firehose-observability-analytics/image2.png","readingTime":"16 min read","url":"/aws-kinesis-data-firehose-observability-analytics","headings":[{"level":2,"title":"Announcing the Kinesis Firehose method","href":"#announcing-the-kinesis-firehose-method"},{"level":2,"title":"Prerequisites and configurations","href":"#prerequisites-and-configurations"},{"level":2,"title":"Elastic’s Amazon Kinesis Data Firehose integration","href":"#elastics-amazon-kinesis-data-firehose-integration"},{"level":2,"title":"Setting it all up","href":"#setting-it-all-up"},{"level":3,"title":"Step 0: Get an account on Elastic Cloud","href":"#step-0-get-an-account-on-elastic-cloud"},{"level":3,"title":"Step 1: Deploy Elastic on AWS","href":"#step-1-deploy-elastic-on-aws"},{"level":3,"title":"_Alternative approach using Terraform_","href":"#_alternative-approach-using-terraform_"},{"level":3,"title":"Step 2: To turn on Elastic\'s AWS integrations, navigate to the Elastic Integration section in your deployment","href":"#step-2-to-turn-on-elastics-aws-integrations-navigate-to-the-elastic-integration-section-in-your-deployment"},{"level":3,"title":"Step 3: Set up the Amazon Kinesis Data Firehose delivery stream on the AWS Console","href":"#step-3-set-up-the-amazon-kinesis-data-firehose-delivery-stream-on-the-aws-console"},{"level":3,"title":"_Alternative approach using Terraform_","href":"#_alternative-approach-using-terraform_-1"},{"level":3,"title":"Step 4: Configure VPC Flow Logs to send to Amazon Kinesis Data Firehose","href":"#step-4-configure-vpc-flow-logs-to-send-to-amazon-kinesis-data-firehose"},{"level":3,"title":"Step 5: After a few minutes, check if flows are coming into Elastic","href":"#step-5-after-a-few-minutes-check-if-flows-are-coming-into-elastic"},{"level":3,"title":"Step 6: Navigate to Kibana to see your logs parsed and visualized in the [Logs AWS] VPC Flow Log Overview dashboard","href":"#step-6-navigate-to-kibana-to-see-your-logs-parsed-and-visualized-in-the-logs-aws-vpc-flow-log-overview-dashboard"},{"level":3,"title":"Step 7: Configure AWS Network Firewall Logs to send to Kinesis Firehose","href":"#step-7-configure-aws-network-firewall-logs-to-send-to-kinesis-firehose"},{"level":3,"title":"Step 8: Navigate to Kibana to see your logs parsed and visualized in the [Logs AWS] Firewall Log dashboard","href":"#step-8-navigate-to-kibana-to-see-your-logs-parsed-and-visualized-in-the-logs-aws-firewall-log-dashboard"},{"level":2,"title":"Wrapping up","href":"#wrapping-up"},{"level":2,"title":"Start a free trial today","href":"#start-a-free-trial-today"}]},{"title":"Wait… Elastic Observability monitors metrics for AWS services in just minutes?","slug":"aws-service-metrics-monitor-observability-easy","date":"2022-11-21","description":"Get metrics and logs from your AWS deployment and Elastic Observability in just minutes! We’ll show you how to use Elastic integrations to quickly monitor and manage the performance of your applications and AWS services to streamline troubleshooting.","image":"blog-charts-packages.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"aws","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"cloud-monitoring","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe transition to distributed applications is in full swing, driven mainly by our need to be “always-on” as consumers and fast-paced businesses. That need is driving deployments to have more complex requirements along with the ability to be globally diverse and rapidly innovate.\\n\\nCloud is becoming the de facto deployment option for today’s applications. Many cloud deployments choose to host their applications on AWS for the globally diverse set of regions it covers and the myriad of services (for faster development and innovation) available, as well as to drive operational and capital costs down. On AWS, development teams are finding additional value in migrating to Kubernetes on Amazon EKS, testing out the latest serverless options, and improving traditional, tiered applications with better services.\\n\\nElastic Observability offers 30 out-of-the-box integrations for AWS services with more to come.\\n\\nA quick review highlighting some of the integrations and capabilities can be found in a previous post:\\n\\n- [Elastic and AWS: Seamlessly ingest logs and metrics into a unified platform with ready-to-use integrations](https://www.elastic.co/blog/elastic-and-aws-seamlessly-ingest-logs-and-metrics-into-a-unified-platform-with-ready-to-use-integrations).\\n\\nSome additional posts on key AWS service integrations on Elastic are:\\n\\n- [APM (metrics, traces and logs) for serverless functions on AWS Lambda with Elastic](https://www.elastic.co/blog/observability-apm-aws-lambda-serverless-functions)\\n- [Log ingestion from AWS Services into Elastic via serverless forwarder on Lambda](https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3)\\n- [Elastic’s Amazon S3 Storage Lens Integration: Simplify management, control costs, and reduce risk](https://www.elastic.co/blog/new-elastic-and-amazon-s3-storage-lens-integration-simplify-management-control-costs-and-reduce-risk)\\n- [Ingest your container logs into Elastic Cloud with AWS FireLens](https://www.elastic.co/blog/elastic-cloud-with-aws-firelens-accelerate-time-to-insight-with-agentless-data-ingestion)\\n\\nA full list of AWS integrations can be found in Elastic’s online documentation:\\n\\n- [Full list of AWS integrations](https://docs.elastic.co/en/integrations/aws)\\n\\nIn addition to our native AWS integrations, Elastic Observability aggregates not only logs but also metrics for AWS services and the applications running on AWS compute services (EC2, Lambda, EKS/ECS/Fargate). All this data can be analyzed visually and more intuitively using Elastic’s advanced machine learning capabilities, which help detect performance issues and surface root causes before end users are affected.\\n\\nFor more details on how Elastic Observability provides application performance monitoring (APM) capabilities such as service maps, tracing, dependencies, and ML based metrics correlations:\\n\\n- [APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions)\\n- [Elastic and AWS: Get the most value from your data sets](https://www.elastic.co/blog/elastic-and-aws-get-the-most-value-from-your-data-sets)\\n\\nThat’s right, Elastic offers metrics ingest, aggregation, and analysis for AWS services and applications on AWS compute services (EC2, Lambda, EKS/ECS/Fargate). Elastic is more than logs — it offers a unified observability solution for AWS environments.\\n\\nIn this blog, I’ll review how Elastic Observability can monitor metrics for a simple AWS application running on AWS services which include:\\n\\n- AWS EC2\\n- AWS ELB\\n- AWS RDS (AuroraDB)\\n- AWS NAT Gateways\\n\\nAs you will see, once the integration is installed, metrics will arrive instantly and you can immediately start reviewing metrics.\\n\\n## Prerequisites and config\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)).\\n- Ensure you have an AWS account with permissions to pull the necessary data from AWS. [See details in our documentation](https://docs.elastic.co/en/integrations/aws#aws-permissions).\\n- We used [AWS’s three tier app](https://github.com/aws-samples/aws-three-tier-web-architecture-workshop) and installed it as instructed in git.\\n- We’ll walk through installing the general [Elastic AWS Integration](https://docs.elastic.co/en/integrations/aws), which covers the four services we want to collect metrics for. \\n ([Full list of services supported by the Elastic AWS Integration](https://docs.elastic.co/en/integrations/aws#reference))\\n- We will _not_ cover application monitoring given other blogs cover application [AWS monitoring](https://www.elastic.co/observability/aws-monitoring) (metrics, logs, and tracing). Instead we will focus on how AWS services can be easily monitored.\\n- In order to see metrics, you will need to load the application. We’ve also created a playwright script to drive traffic to the application.\\n\\n## Three tier application overview\\n\\nBefore we dive into the Elastic configuration, let\'s review what we are monitoring. If you follow the instructions for [aws-three-tier-web-architecture-workshop](https://github.com/aws-samples/aws-three-tier-web-architecture-workshop), you will have the following deployed.\\n\\n![](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-three-tier.png)\\n\\nWhat’s deployed:\\n\\n- 1 VPC with 6 subnets\\n- 2 AZs\\n- 2 web servers per AZ\\n- 2 application servers per AZ\\n- 1 External facing application load balancer\\n- 1 Internal facing application load balancer\\n- 2 NAT gateways to manage traffic to the application layer\\n- 1 Internet gateway\\n- 1 RDS Aurora DB with a read replica\\n\\nAt the end of the blog, we will also provide a Playwright script to implement to load this app. This will help drive metrics to “light up” the dashboards.\\n\\n## Setting it all up\\n\\nLet’s walk through the details of how to get the application, AWS integration on Elastic, and what gets ingested.\\n\\n### Step 0: Load up the AWS Three Tier application and get your credentials\\n\\nFollow the instructions listed out in [AWS’s Three Tier app](https://github.com/aws-samples/aws-three-tier-web-architecture-workshop) and instructions in the workshop link on git. The workshop is listed [here](https://catalog.us-east-1.prod.workshops.aws/workshops/85cd2bb2-7f79-4e96-bdee-8078e469752a/en-US).\\n\\nOnce you’ve installed the app, get credentials from AWS. This will be needed for Elastic’s AWS integration.\\n\\nThere are several options for credentials:\\n\\n- Use access keys directly\\n- Use temporary security credentials\\n- Use a shared credentials file\\n- Use an IAM role Amazon Resource Name (ARN)\\n\\nFor more details on specifics around necessary [credentials](https://docs.elastic.co/en/integrations/aws#aws-credentials) and [permissions](https://docs.elastic.co/en/integrations/aws#aws-permissions).\\n\\n### Step 1: Get an account on Elastic Cloud\\n\\nFollow the instructions to [get started on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home).\\n\\n![](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-get-an-account.png)\\n\\n### Step 2: Install the Elastic AWS integration\\n\\nNavigate to the AWS integration on Elastic.\\n\\n![](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-install-aws-integration.png)\\n\\nSelect Add AWS integration.\\n\\n![](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-add-aws-integration.png)\\n\\nThis is where you will add your credentials and it will be stored as a policy in Elastic. This policy will be used as part of the install for the agent in the next step.\\n\\nAs you can see, the general Elastic AWS Integration will collect a significant amount of data from 30 AWS services. If you don’t want to install this general Elastic AWS Integration, you can select individual integrations to install.\\n\\n### Step 3: Install the Elastic Agent with AWS integration\\n\\nNow that you have created an integration policy, navigate to the Fleet section under Management in Elastic.\\n\\n![](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-install-elastic-agent.png)\\n\\nSelect the name of the policy you created in the last step.\\n\\n![](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-name-policy.png)\\n\\nFollow step 3 in the instructions in the **Add** agent window. This will require you to:\\n\\n1: Bring up an EC2 instance\\n\\n- t2.medium is minimum\\n- Linux - your choice of which\\n- Ensure you allow for Open reservation on the EC2 instance when you Launch it\\n\\n2: Log in to the instance and run the commands under Linux Tar tab (below is an example)\\n\\n```bash\\ncurl -L -O https://artifacts.elastic.co/downloads/beats/elastic-agent/elastic-agent-8.5.0-linux-x86_64.tar.gz\\ntar xzvf elastic-agent-8.5.0-linux-x86_64.tar.gz\\ncd elastic-agent-8.5.0-linux-x86_64\\nsudo ./elastic-agent install --url=https://37845638732625692c8ee914d88951dd96.fleet.us-central1.gcp.cloud.es.io:443 --enrollment-token=jkhfglkuwyvrquevuytqoeiyri\\n```\\n\\n### Step 4: Run traffic against the application\\n\\nWhile getting the application running is fairly easy, there is nothing to monitor or observe with Elastic unless you add a load on the application.\\n\\nHere is a simple script you can also run using [Playwright](https://playwright.dev/) to add traffic to the website for the AWS three tier application:\\n\\n```javascript\\nimport { test, expect } from \\"@playwright/test\\";\\n\\ntest(\\"homepage for AWS Threetierapp\\", async ({ page }) => {\\n await page.goto(\\n \\"https://web-tier-external-lb-1897463036.us-west-1.elb.amazonaws.com/#/db\\"\\n );\\n\\n await page.fill(\\n \\"#transactions > tbody > tr > td:nth-child(2) > input\\",\\n (Math.random() * 100).toString()\\n );\\n await page.fill(\\n \\"#transactions > tbody > tr > td:nth-child(3) > input\\",\\n (Math.random() * 100).toString()\\n );\\n await page.waitForTimeout(1000);\\n await page.click(\\n \\"#transactions > tbody > tr:nth-child(2) > td:nth-child(1) > input[type=button]\\"\\n );\\n await page.waitForTimeout(4000);\\n});\\n```\\n\\nThis script will launch three browsers, but you can limit this load to one browser in playwright.config.ts file.\\n\\nFor this exercise, we ran this traffic for approximately five hours with an interval of five minutes while testing the website.\\n\\n### Step 5: Go to AWS dashboards\\n\\nNow that your Elastic Agent is running, you can go to the related AWS dashboards to view what’s being ingested.\\n\\nTo search for the AWS Integration dashboards, simply search for them in the Elastic search bar. The relevant ones for this blog are:\\n\\n- [Metrics AWS] EC2 Overview\\n- [Metrics AWS] ELB Overview\\n- [Metrics AWS] RDS Overview\\n- [Metrics AWS] NAT Gateway\\n\\n![](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-search-aws-integration-dashboards.png)\\n\\nLet\'s see what comes up!\\n\\nAll of these dashboards are out-of-the-box and for all the following images, we’ve narrowed the views to only the relevant items from our app.\\n\\nAcross all dashboards, we’ve limited the timeframe to when we ran the traffic generator.\\n\\n![Elastic Observability EC2 Overview Dashboard](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-dashboard-traffic-generator.png)\\n\\nOnce we filtered for our 4 EC2 instances (2 web servers and 2 application servers), we can see the following:\\n\\n1: All 4 instances are up and running with no failures in status checks.\\n\\n2: We see the average CPU utilization across the timeframe and nothing looks abnormal.\\n\\n3: We see the network bytes flow in and out, aggregating over time as the database is loaded with rows.\\n\\nWhile this exercise shows a small portion of the metrics that can be viewed, more are available from AWS EC2. The metrics listed on [AWS documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/viewing_metrics_with_cloudwatch.html) are all available, including the dimensions to help narrow the search for specific instances, etc.\\n\\n![Elastic Observability ELB Overview Dashboard](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-overview-dashboard.png)\\n\\nFor the ELB dashboard, we filter for our 2 load balancers (external web load balancer and internal application load balancer).\\n\\nWith the out-of-the-box dashboard, you can see application ELB-specific metrics. A good portion of the application ELB specific metrics listed in [AWS Docs](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-cloudwatch-metrics.html) are available to add graphs for.\\n\\nFor our two load balancers, we can see:\\n\\n1: Both the hosts (EC2 instances connected to the ELBs) are healthy.\\n\\n2: Load Balancer Capacity Units (how much you are using) and request counts both went up as expected during the traffic generation time frame.\\n\\n3: We picked to show 4XX and 2XX counts. 4XX will help identify issues with the application or connectivity with the application servers.\\n\\n![Elastic Observability RDS Overview Dashboard](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-transaction-blocked.png)\\n\\nFor AuroraDB, which is deployed in RDS, we’ve filtered for just the primary and secondary instances of Aurora on the dashboard.\\n\\nJust as with EC2, ELB, most RDS metrics from Cloudwatch are also available to create new charts and graphs. In this dashboard, we’ve narrowed it down to showing:\\n\\n1: Insert throughput & Select throughput\\n\\n2: Write latency\\n\\n3: CPU usage\\n\\n4: General number of connections during the timeframe\\n\\n![ Elastic Observability AWS NAT Dashboard](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-aws-nat-dashboard.png)\\n\\nWe filtered to look only at our 2 NAT instances which are fronting the application servers. As with the other dashboards, other metrics are available to build graphs and /charts as needed.\\n\\nFor the NAT dashboard we can see the following:\\n\\n1: The NAT Gateways are doing well due to no packet drops\\n\\n2: An expected number of active connections from the web server\\n\\n3: Fairly normal set of metrics for bytes in and out\\n\\n**Congratulations, you have now started monitoring metrics from key AWS services for your application!**\\n\\n## What to monitor on AWS next?\\n\\n### Add logs from AWS Services\\n\\nNow that metrics are being monitored, you can also now add logging. There are several options for ingesting logs.\\n\\n1. The AWS Integration in the Elastic Agent has logs setting. Just ensure you turn on what you wish to receive. Let’s ingest the Aurora Logs from RDS. In the Elastic agent policy, we simply turn on Collect logs from CloudWatch (see below). Next, update the agent through the Fleet management UI.\\n\\n![](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-collect-logs.png)\\n\\n2. You can install the [Lambda logs forwarder](https://github.com/elastic/elastic-serverless-forwarder/blob/main/docs/README-AWS.md#deploying-elastic-serverless-forwarder). This option will pull logs from multiple locations. See the architecture diagram below.\\n\\n![](/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-lambda-logs-forwarder.png)\\n\\nA review of this option is also found in the following [blog](https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3).\\n\\n### Analyze your data with Elastic Machine Learning\\n\\nOnce metrics and logs (or either one) are in Elastic, start analyzing your data through Elastic’s ML capabilities. A great review of these features can be found here:\\n\\n- [Correlating APM Telemetry to determine root causes in transactions](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions)\\n- [Introduction to Elastic Machine Learning](https://www.elastic.co/elasticon/archive/2020/global/machine-learning-and-the-elastic-stack-everywhere-you-need-it)\\n\\nAnd there are many more videos and blogs on [Elastic’s Blog](https://www.elastic.co/blog/).\\n\\n## Conclusion: Monitoring AWS service metrics with Elastic Observability is easy!\\n\\nI hope you’ve gotten an appreciation for how Elastic Observability can help you monitor AWS service metrics, here’s a quick recap of lessons and what you learned:\\n\\n- Elastic Observability supports ingest and analysis of AWS service metrics\\n- It’s easy to set up ingest from AWS Services via the Elastic Agent\\n- Elastic Observability has multiple out-of-the-box (OOTB) AWS service dashboards you can use to preliminarily review information, then modify for your needs\\n- 30+ AWS services are supported as part of AWS Integration on Elastic Observability, with more services being added regularly\\n- As noted in related blogs, you can analyze your AWS service metrics with Elastic’s machine learning capabilities\\n\\nStart your own [7-day free trial](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el) by signing up via [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=d54b31eb-671c-49ba-88bb-7a1106421dfa%E2%89%BBchannel=el) and quickly spin up a deployment in minutes on any of the [Elastic Cloud regions on AWS](https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions) around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var w=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var f=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),b=(t,e)=>{for(var n in e)r(t,n,{get:e[n],enumerable:!0})},o=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of w(e))!m.call(t,a)&&a!==n&&r(t,a,{get:()=>e[a],enumerable:!(s=g(e,a))||s.enumerable});return t};var y=(t,e,n)=>(n=t!=null?p(u(t)):{},o(e||!t||!t.__esModule?r(n,\\"default\\",{value:t,enumerable:!0}):n,t)),v=t=>o(r({},\\"__esModule\\",{value:!0}),t);var c=f((E,l)=>{l.exports=_jsx_runtime});var S={};b(S,{default:()=>d,frontmatter:()=>A});var i=y(c()),A={title:\\"Wait\\\\u2026 Elastic Observability monitors metrics for AWS services in just minutes?\\",slug:\\"aws-service-metrics-monitor-observability-easy\\",date:\\"2022-11-21\\",description:\\"Get metrics and logs from your AWS deployment and Elastic Observability in just minutes! We\\\\u2019ll show you how to use Elastic integrations to quickly monitor and manage the performance of your applications and AWS services to streamline troubleshooting.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"blog-charts-packages.png\\",tags:[{slug:\\"aws\\"},{slug:\\"metrics\\"},{slug:\\"cloud-monitoring\\"}]};function h(t){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(e.p,{children:\\"The transition to distributed applications is in full swing, driven mainly by our need to be \\\\u201Calways-on\\\\u201D as consumers and fast-paced businesses. That need is driving deployments to have more complex requirements along with the ability to be globally diverse and rapidly innovate.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Cloud is becoming the de facto deployment option for today\\\\u2019s applications. Many cloud deployments choose to host their applications on AWS for the globally diverse set of regions it covers and the myriad of services (for faster development and innovation) available, as well as to drive operational and capital costs down. On AWS, development teams are finding additional value in migrating to Kubernetes on Amazon EKS, testing out the latest serverless options, and improving traditional, tiered applications with better services.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic Observability offers 30 out-of-the-box integrations for AWS services with more to come.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"A quick review highlighting some of the integrations and capabilities can be found in a previous post:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-and-aws-seamlessly-ingest-logs-and-metrics-into-a-unified-platform-with-ready-to-use-integrations\\",rel:\\"nofollow\\",children:\\"Elastic and AWS: Seamlessly ingest logs and metrics into a unified platform with ready-to-use integrations\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Some additional posts on key AWS service integrations on Elastic are:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-apm-aws-lambda-serverless-functions\\",rel:\\"nofollow\\",children:\\"APM (metrics, traces and logs) for serverless functions on AWS Lambda with Elastic\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3\\",rel:\\"nofollow\\",children:\\"Log ingestion from AWS Services into Elastic via serverless forwarder on Lambda\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/new-elastic-and-amazon-s3-storage-lens-integration-simplify-management-control-costs-and-reduce-risk\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s Amazon S3 Storage Lens Integration: Simplify management, control costs, and reduce risk\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-cloud-with-aws-firelens-accelerate-time-to-insight-with-agentless-data-ingestion\\",rel:\\"nofollow\\",children:\\"Ingest your container logs into Elastic Cloud with AWS FireLens\\"})}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"A full list of AWS integrations can be found in Elastic\\\\u2019s online documentation:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws\\",rel:\\"nofollow\\",children:\\"Full list of AWS integrations\\"})}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"In addition to our native AWS integrations, Elastic Observability aggregates not only logs but also metrics for AWS services and the applications running on AWS compute services (EC2, Lambda, EKS/ECS/Fargate). All this data can be analyzed visually and more intuitively using Elastic\\\\u2019s advanced machine learning capabilities, which help detect performance issues and surface root causes before end users are affected.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"For more details on how Elastic Observability provides application performance monitoring (APM) capabilities such as service maps, tracing, dependencies, and ML based metrics correlations:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-and-aws-get-the-most-value-from-your-data-sets\\",rel:\\"nofollow\\",children:\\"Elastic and AWS: Get the most value from your data sets\\"})}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"That\\\\u2019s right, Elastic offers metrics ingest, aggregation, and analysis for AWS services and applications on AWS compute services (EC2, Lambda, EKS/ECS/Fargate). Elastic is more than logs \\\\u2014 it offers a unified observability solution for AWS environments.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"In this blog, I\\\\u2019ll review how Elastic Observability can monitor metrics for a simple AWS application running on AWS services which include:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"AWS EC2\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"AWS ELB\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"AWS RDS (AuroraDB)\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"AWS NAT Gateways\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"As you will see, once the integration is installed, metrics will arrive instantly and you can immediately start reviewing metrics.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[\\"Ensure you have an account on \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\").\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Ensure you have an AWS account with permissions to pull the necessary data from AWS. \\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#aws-permissions\\",rel:\\"nofollow\\",children:\\"See details in our documentation\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"We used \\",(0,i.jsx)(e.a,{href:\\"https://github.com/aws-samples/aws-three-tier-web-architecture-workshop\\",rel:\\"nofollow\\",children:\\"AWS\\\\u2019s three tier app\\"}),\\" and installed it as instructed in git.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"We\\\\u2019ll walk through installing the general \\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws\\",rel:\\"nofollow\\",children:\\"Elastic AWS Integration\\"}),\\", which covers the four services we want to collect metrics for.\\",(0,i.jsx)(e.br,{}),`\\n`,\\"(\\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#reference\\",rel:\\"nofollow\\",children:\\"Full list of services supported by the Elastic AWS Integration\\"}),\\")\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"We will \\",(0,i.jsx)(e.em,{children:\\"not\\"}),\\" cover application monitoring given other blogs cover application \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/aws-monitoring\\",rel:\\"nofollow\\",children:\\"AWS monitoring\\"}),\\" (metrics, logs, and tracing). Instead we will focus on how AWS services can be easily monitored.\\"]}),`\\n`,(0,i.jsx)(e.li,{children:\\"In order to see metrics, you will need to load the application. We\\\\u2019ve also created a playwright script to drive traffic to the application.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"three-tier-application-overview\\",children:\\"Three tier application overview\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Before we dive into the Elastic configuration, let\'s review what we are monitoring. If you follow the instructions for \\",(0,i.jsx)(e.a,{href:\\"https://github.com/aws-samples/aws-three-tier-web-architecture-workshop\\",rel:\\"nofollow\\",children:\\"aws-three-tier-web-architecture-workshop\\"}),\\", you will have the following deployed.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-three-tier.png\\",alt:\\"\\",width:\\"1214\\",height:\\"548\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"What\\\\u2019s deployed:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"1 VPC with 6 subnets\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"2 AZs\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"2 web servers per AZ\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"2 application servers per AZ\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"1 External facing application load balancer\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"1 Internal facing application load balancer\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"2 NAT gateways to manage traffic to the application layer\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"1 Internet gateway\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"1 RDS Aurora DB with a read replica\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"At the end of the blog, we will also provide a Playwright script to implement to load this app. This will help drive metrics to \\\\u201Clight up\\\\u201D the dashboards.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"setting-it-all-up\\",children:\\"Setting it all up\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Let\\\\u2019s walk through the details of how to get the application, AWS integration on Elastic, and what gets ingested.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-0-load-up-the-aws-three-tier-application-and-get-your-credentials\\",children:\\"Step 0: Load up the AWS Three Tier application and get your credentials\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Follow the instructions listed out in \\",(0,i.jsx)(e.a,{href:\\"https://github.com/aws-samples/aws-three-tier-web-architecture-workshop\\",rel:\\"nofollow\\",children:\\"AWS\\\\u2019s Three Tier app\\"}),\\" and instructions in the workshop link on git. The workshop is listed \\",(0,i.jsx)(e.a,{href:\\"https://catalog.us-east-1.prod.workshops.aws/workshops/85cd2bb2-7f79-4e96-bdee-8078e469752a/en-US\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once you\\\\u2019ve installed the app, get credentials from AWS. This will be needed for Elastic\\\\u2019s AWS integration.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"There are several options for credentials:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Use access keys directly\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Use temporary security credentials\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Use a shared credentials file\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Use an IAM role Amazon Resource Name (ARN)\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"For more details on specifics around necessary \\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#aws-credentials\\",rel:\\"nofollow\\",children:\\"credentials\\"}),\\" and \\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#aws-permissions\\",rel:\\"nofollow\\",children:\\"permissions\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-1-get-an-account-on-elastic-cloud\\",children:\\"Step 1: Get an account on Elastic Cloud\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Follow the instructions to \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-get-an-account.png\\",alt:\\"\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-2-install-the-elastic-aws-integration\\",children:\\"Step 2: Install the Elastic AWS integration\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Navigate to the AWS integration on Elastic.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-install-aws-integration.png\\",alt:\\"\\",width:\\"1044\\",height:\\"1054\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Select Add AWS integration.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-add-aws-integration.png\\",alt:\\"\\",width:\\"401\\",height:\\"1021\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"This is where you will add your credentials and it will be stored as a policy in Elastic. This policy will be used as part of the install for the agent in the next step.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"As you can see, the general Elastic AWS Integration will collect a significant amount of data from 30 AWS services. If you don\\\\u2019t want to install this general Elastic AWS Integration, you can select individual integrations to install.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-3-install-the-elastic-agent-with-aws-integration\\",children:\\"Step 3: Install the Elastic Agent with AWS integration\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Now that you have created an integration policy, navigate to the Fleet section under Management in Elastic.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-install-elastic-agent.png\\",alt:\\"\\",width:\\"199\\",height:\\"210\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Select the name of the policy you created in the last step.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-name-policy.png\\",alt:\\"\\",width:\\"1208\\",height:\\"1018\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Follow step 3 in the instructions in the \\",(0,i.jsx)(e.strong,{children:\\"Add\\"}),\\" agent window. This will require you to:\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"1: Bring up an EC2 instance\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"t2.medium is minimum\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Linux - your choice of which\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Ensure you allow for Open reservation on the EC2 instance when you Launch it\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"2: Log in to the instance and run the commands under Linux Tar tab (below is an example)\\"}),`\\n`,(0,i.jsx)(e.pre,{children:(0,i.jsx)(e.code,{className:\\"language-bash\\",children:`curl -L -O https://artifacts.elastic.co/downloads/beats/elastic-agent/elastic-agent-8.5.0-linux-x86_64.tar.gz\\ntar xzvf elastic-agent-8.5.0-linux-x86_64.tar.gz\\ncd elastic-agent-8.5.0-linux-x86_64\\nsudo ./elastic-agent install --url=https://37845638732625692c8ee914d88951dd96.fleet.us-central1.gcp.cloud.es.io:443 --enrollment-token=jkhfglkuwyvrquevuytqoeiyri\\n`})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-4-run-traffic-against-the-application\\",children:\\"Step 4: Run traffic against the application\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"While getting the application running is fairly easy, there is nothing to monitor or observe with Elastic unless you add a load on the application.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Here is a simple script you can also run using \\",(0,i.jsx)(e.a,{href:\\"https://playwright.dev/\\",rel:\\"nofollow\\",children:\\"Playwright\\"}),\\" to add traffic to the website for the AWS three tier application:\\"]}),`\\n`,(0,i.jsx)(e.pre,{children:(0,i.jsx)(e.code,{className:\\"language-javascript\\",children:`import { test, expect } from \\"@playwright/test\\";\\n\\ntest(\\"homepage for AWS Threetierapp\\", async ({ page }) => {\\n await page.goto(\\n \\"https://web-tier-external-lb-1897463036.us-west-1.elb.amazonaws.com/#/db\\"\\n );\\n\\n await page.fill(\\n \\"#transactions > tbody > tr > td:nth-child(2) > input\\",\\n (Math.random() * 100).toString()\\n );\\n await page.fill(\\n \\"#transactions > tbody > tr > td:nth-child(3) > input\\",\\n (Math.random() * 100).toString()\\n );\\n await page.waitForTimeout(1000);\\n await page.click(\\n \\"#transactions > tbody > tr:nth-child(2) > td:nth-child(1) > input[type=button]\\"\\n );\\n await page.waitForTimeout(4000);\\n});\\n`})}),`\\n`,(0,i.jsx)(e.p,{children:\\"This script will launch three browsers, but you can limit this load to one browser in playwright.config.ts file.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"For this exercise, we ran this traffic for approximately five hours with an interval of five minutes while testing the website.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-5-go-to-aws-dashboards\\",children:\\"Step 5: Go to AWS dashboards\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Now that your Elastic Agent is running, you can go to the related AWS dashboards to view what\\\\u2019s being ingested.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"To search for the AWS Integration dashboards, simply search for them in the Elastic search bar. The relevant ones for this blog are:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"[Metrics AWS] EC2 Overview\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"[Metrics AWS] ELB Overview\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"[Metrics AWS] RDS Overview\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"[Metrics AWS] NAT Gateway\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-search-aws-integration-dashboards.png\\",alt:\\"\\",width:\\"957\\",height:\\"77\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Let\'s see what comes up!\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"All of these dashboards are out-of-the-box and for all the following images, we\\\\u2019ve narrowed the views to only the relevant items from our app.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Across all dashboards, we\\\\u2019ve limited the timeframe to when we ran the traffic generator.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-dashboard-traffic-generator.png\\",alt:\\"Elastic Observability EC2 Overview Dashboard\\",width:\\"1999\\",height:\\"1000\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once we filtered for our 4 EC2 instances (2 web servers and 2 application servers), we can see the following:\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"1: All 4 instances are up and running with no failures in status checks.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"2: We see the average CPU utilization across the timeframe and nothing looks abnormal.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"3: We see the network bytes flow in and out, aggregating over time as the database is loaded with rows.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"While this exercise shows a small portion of the metrics that can be viewed, more are available from AWS EC2. The metrics listed on \\",(0,i.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/viewing_metrics_with_cloudwatch.html\\",rel:\\"nofollow\\",children:\\"AWS documentation\\"}),\\" are all available, including the dimensions to help narrow the search for specific instances, etc.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-overview-dashboard.png\\",alt:\\"Elastic Observability ELB Overview Dashboard\\",width:\\"1681\\",height:\\"985\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"For the ELB dashboard, we filter for our 2 load balancers (external web load balancer and internal application load balancer).\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"With the out-of-the-box dashboard, you can see application ELB-specific metrics. A good portion of the application ELB specific metrics listed in \\",(0,i.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-cloudwatch-metrics.html\\",rel:\\"nofollow\\",children:\\"AWS Docs\\"}),\\" are available to add graphs for.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"For our two load balancers, we can see:\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"1: Both the hosts (EC2 instances connected to the ELBs) are healthy.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"2: Load Balancer Capacity Units (how much you are using) and request counts both went up as expected during the traffic generation time frame.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"3: We picked to show 4XX and 2XX counts. 4XX will help identify issues with the application or connectivity with the application servers.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-transaction-blocked.png\\",alt:\\"Elastic Observability RDS Overview Dashboard\\",width:\\"1682\\",height:\\"1201\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"For AuroraDB, which is deployed in RDS, we\\\\u2019ve filtered for just the primary and secondary instances of Aurora on the dashboard.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Just as with EC2, ELB, most RDS metrics from Cloudwatch are also available to create new charts and graphs. In this dashboard, we\\\\u2019ve narrowed it down to showing:\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"1: Insert throughput & Select throughput\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"2: Write latency\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"3: CPU usage\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"4: General number of connections during the timeframe\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-aws-nat-dashboard.png\\",alt:\\" Elastic Observability AWS NAT Dashboard\\",width:\\"1999\\",height:\\"1081\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"We filtered to look only at our 2 NAT instances which are fronting the application servers. As with the other dashboards, other metrics are available to build graphs and /charts as needed.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"For the NAT dashboard we can see the following:\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"1: The NAT Gateways are doing well due to no packet drops\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"2: An expected number of active connections from the web server\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"3: Fairly normal set of metrics for bytes in and out\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.strong,{children:\\"Congratulations, you have now started monitoring metrics from key AWS services for your application!\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"what-to-monitor-on-aws-next\\",children:\\"What to monitor on AWS next?\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"add-logs-from-aws-services\\",children:\\"Add logs from AWS Services\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Now that metrics are being monitored, you can also now add logging. There are several options for ingesting logs.\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"The AWS Integration in the Elastic Agent has logs setting. Just ensure you turn on what you wish to receive. Let\\\\u2019s ingest the Aurora Logs from RDS. In the Elastic agent policy, we simply turn on Collect logs from CloudWatch (see below). Next, update the agent through the Fleet management UI.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-collect-logs.png\\",alt:\\"\\",width:\\"718\\",height:\\"345\\"})}),`\\n`,(0,i.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,i.jsxs)(e.li,{children:[\\"You can install the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-serverless-forwarder/blob/main/docs/README-AWS.md#deploying-elastic-serverless-forwarder\\",rel:\\"nofollow\\",children:\\"Lambda logs forwarder\\"}),\\". This option will pull logs from multiple locations. See the architecture diagram below.\\"]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/aws-service-metrics-monitor-observability-easy/blog-elastic-lambda-logs-forwarder.png\\",alt:\\"\\",width:\\"1600\\",height:\\"676\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"A review of this option is also found in the following \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"analyze-your-data-with-elastic-machine-learning\\",children:\\"Analyze your data with Elastic Machine Learning\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once metrics and logs (or either one) are in Elastic, start analyzing your data through Elastic\\\\u2019s ML capabilities. A great review of these features can be found here:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"Correlating APM Telemetry to determine root causes in transactions\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/elasticon/archive/2020/global/machine-learning-and-the-elastic-stack-everywhere-you-need-it\\",rel:\\"nofollow\\",children:\\"Introduction to Elastic Machine Learning\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"And there are many more videos and blogs on \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s Blog\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"conclusion-monitoring-aws-service-metrics-with-elastic-observability-is-easy\\",children:\\"Conclusion: Monitoring AWS service metrics with Elastic Observability is easy!\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"I hope you\\\\u2019ve gotten an appreciation for how Elastic Observability can help you monitor AWS service metrics, here\\\\u2019s a quick recap of lessons and what you learned:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Elastic Observability supports ingest and analysis of AWS service metrics\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"It\\\\u2019s easy to set up ingest from AWS Services via the Elastic Agent\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Elastic Observability has multiple out-of-the-box (OOTB) AWS service dashboards you can use to preliminarily review information, then modify for your needs\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"30+ AWS services are supported as part of AWS Integration on Elastic Observability, with more services being added regularly\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"As noted in related blogs, you can analyze your AWS service metrics with Elastic\\\\u2019s machine learning capabilities\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Start your own \\",(0,i.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"7-day free trial\\"}),\\" by signing up via \\",(0,i.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=d54b31eb-671c-49ba-88bb-7a1106421dfa%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"AWS Marketplace\\"}),\\" and quickly spin up a deployment in minutes on any of the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions\\",rel:\\"nofollow\\",children:\\"Elastic Cloud regions on AWS\\"}),\\" around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,i.jsx)(e,{...t,children:(0,i.jsx)(h,{...t})}):h(t)}return v(S);})();\\n;return Component;"},"_id":"articles/aws-service-metrics-monitor-observability-easy.mdx","_raw":{"sourceFilePath":"articles/aws-service-metrics-monitor-observability-easy.mdx","sourceFileName":"aws-service-metrics-monitor-observability-easy.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/aws-service-metrics-monitor-observability-easy"},"type":"Article","imageUrl":"/assets/images/aws-service-metrics-monitor-observability-easy/blog-charts-packages.png","readingTime":"12 min read","url":"/aws-service-metrics-monitor-observability-easy","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"Three tier application overview","href":"#three-tier-application-overview"},{"level":2,"title":"Setting it all up","href":"#setting-it-all-up"},{"level":3,"title":"Step 0: Load up the AWS Three Tier application and get your credentials","href":"#step-0-load-up-the-aws-three-tier-application-and-get-your-credentials"},{"level":3,"title":"Step 1: Get an account on Elastic Cloud","href":"#step-1-get-an-account-on-elastic-cloud"},{"level":3,"title":"Step 2: Install the Elastic AWS integration","href":"#step-2-install-the-elastic-aws-integration"},{"level":3,"title":"Step 3: Install the Elastic Agent with AWS integration","href":"#step-3-install-the-elastic-agent-with-aws-integration"},{"level":3,"title":"Step 4: Run traffic against the application","href":"#step-4-run-traffic-against-the-application"},{"level":3,"title":"Step 5: Go to AWS dashboards","href":"#step-5-go-to-aws-dashboards"},{"level":2,"title":"What to monitor on AWS next?","href":"#what-to-monitor-on-aws-next"},{"level":3,"title":"Add logs from AWS Services","href":"#add-logs-from-aws-services"},{"level":3,"title":"Analyze your data with Elastic Machine Learning","href":"#analyze-your-data-with-elastic-machine-learning"},{"level":2,"title":"Conclusion: Monitoring AWS service metrics with Elastic Observability is easy!","href":"#conclusion-monitoring-aws-service-metrics-with-elastic-observability-is-easy"}]},{"title":"AWS VPC Flow log analysis with GenAI in Elastic","slug":"aws-vpc-flow-log-analysis-with-genai-elastic","date":"2024-06-07","description":"Elastic has a set of embedded capabilities such as a GenAI RAG-based AI Assistant and a machine learning platform as part of the product baseline. These make analyzing the vast number of logs you get from AWS VPC Flows easier.","image":"21-cubes.jpeg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"aws","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}},{"slug":"aws-vpc-flow-logs","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"ai-assistant","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic Observability provides a full observability solution, by supporting metrics, traces and logs for applications and infrastructure. In managing AWS deployments, VPC flow logs are critical in managing performance, network visibility, security, compliance, and overall management of your AWS environment. Several examples of :\\n\\n1. Where traffic is coming in from and going out to from the deployment, and within the deployment. This helps identify unusual or unauthorized communications\\n\\n2. Traffic volumes detecting spikes or drops which could indicate service issues in production or an increase in customer traffic\\n\\n3. Latency and Performance bottlenecks - with VPC Flow logs, you can look at latency for a flow (in and outflows), and understand patterns\\n\\n4. Accepted and rejected traffic helps determine where potential security threats and misconfigurations lie.\xa0\\n\\nAWS VPC Logs is a great example of how logs are great. Logging is an important part of Observability, for which we generally think of metrics and tracing. However, the amount of logs an application and the underlying infrastructure output can be significantly daunting with VPC Logs. However, it also provides a significant amount of insight.\\n\\nBefore we proceed, it is important to understand what Elastic provides in managing AWS and VPC Flow logs:\\n\\n1. A full set of integrations to manage VPC Flows and the [entire end-to-end deployment on AWS](https://www.elastic.co/observability-labs/blog/aws-service-metrics-monitor-observability-easy).\xa0\\n\\n2. Elastic has a simple-to-use [AWS Firehose integration](https://www.elastic.co/observability-labs/blog/aws-kinesis-data-firehose-observability-analytics).\xa0\\n\\n3. Elastic’s tools such as [Discover, spike analysis,\xa0 and anomaly detection help provide you with better insights and analysis](https://www.elastic.co/observability-labs/blog/vpc-flow-logs-monitoring-analytics-observability).\\n\\n4. And a set of simple [Out-of-the-box dashboards](https://www.elastic.co/guide/en/observability/current/monitor-amazon-vpc-flow-logs.html#aws-firehose-dashboard)\\n\\nIn today’s blog, we’ll cover how Elastics’ other features can support analyzing and RCA for potential VPC flow logs even more easily. Specifically, we will focus on managing the number of rejects, as this helps ensure there weren’t any unauthorized or unusual activities:\\n\\n1. Set up an easy-to-use SLO (newly released) to detect when things are potentially degrading\\n\\n2. Create an ML job to analyze different fields of the VPC Flow log\\n\\n3. Using our newly released RAG-based AI Assistant to help analyze the logs without needing to know Elastic’s query language nor how to even graph on Elastic\\n\\n4. ES|QL will help understand and analyze add latency for patterns.\\n\\nIn subsequent blogs, we will use AI Assistant and ESQL to show how to get other insights beyond just REJECT/ACCEPT from VPC Flow log.\\n\\n## Prerequisites and config\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)).\\n\\n- Follow the steps in the following blog to get [AWS’s three-tier app](https://github.com/aws-samples/aws-three-tier-web-architecture-workshop) installed instructed in git, and bring in the [AWS VPC Flow logs](https://www.elastic.co/observability-labs/blog/aws-kinesis-data-firehose-observability-analytics).\\n\\n- Ensure you have an [ML node configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-settings.html) in your Elastic stack\\n\\n- To use the AI Assistant you will need a trial or upgrade to Platinum.\\n\\n\\n## SLO with VPC Flow Logs\\n\\n\\nElastic’s SLO capability is based directly on the Google SRE Handbook. All the definitions and semantics are utilized as described in Google’s SRE handbook. Hence users can perform the following on SLOs in Elastic:\\n\\n\\n* Define an SLO on Logs not just metrics - Users can use KQL (log-based query), service availability, service latency, custom metric, histogram metric, or a timeslice metric.\\n* Define SLO, SLI, Error budget and burn rates. Users can also use occurrence versus time slice-based budgeting.\xa0\\n* Manage, with dashboards, all the SLOs in a singular location.\\n* Trigger alerts from the defined SLO, whether the SLI is off, the burn rate is used up, or the error rate is X.\\n\\nSetting up an SLO for VPC is easy. You simply create a query you want to trigger off. In our case, we look for all the good events where _aws.vpcflow.action=ACCEPT_ and we define the target at 85%.\xa0\\n\\n![Setting up SLO for VPC FLow log](/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlowSLOsetup.png)\\n\\nAs the following example shows, over the last 7 days, we have exceeded our budget by 43%. Additionally, we have not complied for the last 7 days.\\n\\n![VPC Flow Reject SLO](/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlowSLOMiss.png)\\n\\n## Analyzing the SLO with AI Assistant\\n\\nNow that we see that there is an issue with the VPC Flows, we immediately work with the AI Assistant to start analyzing the SLO. Because it\'s a chat interface we simply open the AI Assistant and work through some simple analysis: (See Animated GIF for a demo below)\\n\\n### AI Assistant analysis:\\n\\n* **what were the top 3 source.address that had _aws.vpcflow.action=REJECT_ over the last 7 days, which is causing this SLO issue?** - We wanted to simply see what could be causing the loss in error budget. Were there any particular source.addresses causing a heavy reject rate.\\n\\n - The answer: A table with the highest count = 42670 and _source.address = 79.110.62.185_\\n\\n - There is one singular _source.address_ that is causing the loss in SLO.\xa0\\n\\n* **What is the largest number of\xa0 _aws.vpcflow.action=REJECT_ in a 30 min time frame for the last 3 days where the _source.address=79.110.62.185_?** - After understanding that a specific source.address is causing the loss in SLO, we want to understand the averages.\xa0\\n\\n - **The answer: ** \\"The largest number of _aws.vpcflow.action=REJECT_ in a 30-minute time frame for the last 3 days where the _source.address_ is 79.110.62.185 is 229. This occurred on 2024-06-01T04:00:00.000Z.”\\n\\n - It means there must be a low REJECT rate but fairly consistent vs spiky over the last 7 days. \\n\\n* **for the logs with _source.address_=\\"79.110.62.185\\" was there any country code of *source.geo.country_iso_code* field present. If yes what is the value** - Given the last question showed a low REJECT rate, it only means that this was fairly consistent vs spiky over the last 7 days.\\n\\n - **The answer:** Yes, there is a country code present in the *source.geo.country_iso_code* field for logs with _source.address_=\\"79.110.62.185\\". The value is BG (Bulgaria).\\n\\n* **Is there a specific destination.address where _source.address=79.110.62.185_ is getting a _aws.vpcflow.action=REJECT_. Give me both the destination.address and the number of REJECTs for that destination.address?**\\n\\n - **The answer:** destination.address of 10.0.0.27 is giving a reject number of 53433 in this time frame.\\n\\n* **Graph the number of REJECT vs ACCEPT for _source.address_=\\"79.110.62.185\\" over the last 7 days. The graph is on a daily basis in a singular graph** - We asked this question to see what the comparison is between ACCEPT and REJECT.\xa0\\n\\n - **The answer:** See the animated GIF to see that the generated graph is fairly stable\\n\\n* **Were there any source.address that had a spike, high reject rate in. a 30min period over the 30 days?** - We wanted to see if there was any other spike\xa0\\n\\n - **The answer** - Yes, there was a source.address that had a spike in high reject rates in a 30-minute period over the last 30 days. _source.address_: 185.244.212.67, Reject Count: 8975, Time Period: 2024-05-22T03:00:00.000Z\\n\\n****\\n\\n### Watch the flow\\n\\n\\n\\n\\n\\n### Potential issue:\\n\\nhe server handling requests from source **_79.110.62.185_** is potentially having an issue.\\n\\nAgain using logs, we essentially asked the AI Assistant to give the _eni_ ids where the internal ip address was 10.0.0.27\\n\\n\\n![Finding the issue - webserver](/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlow-findingwebserver.png)\\n\\nFrom our AWS console, we know that this is the webserver. Further analysis in Elastic, and with the developers we realized there is a new version that was installed recently causing a problem with connections.\\n\\n## Locating anomalies with ML\\n\\nWhile using the AI Assistant is great for analyzing information, another important aspect of VPC flow management is to ensure you can manage log spikes and anomalies. Elastic has a machine learning platform that allows you to develop jobs to analyze specific metrics or multiple metrics to look for anomalies.\\n\\nVPC Flow logs come with a large amount of information. The full set of fields is listed in [AWS docs](https://docs.aws.amazon.com/vpc/latest/userguide/flow-logs.html#flow-logs-basics). We will use a specific subset to help detect anomalies.\\n\\nWe were setting up anomalies for aws.vpcflow\\\\.action=REJECT, which requires us to use multimetric anomaly detection in Elastic.\\n\\nThe config we used utilizes:\\n\\nDetectors:\\n\\n- destination.address\\n\\n- destination.port\\n\\nInfluencers:\\n\\n- source.address\\n\\n- aws.vpcflow.action\\n\\n- destination.geo.region_iso_code\\n\\nThe way we set this up will help us understand if there is a large spike in REJECT/ACCEPT against _destination.address_ values from a specific _source.address_ and/or *destination.geo.region_iso_code* location.\\n\\n![Anomaly detection job config](/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlowanomalysetup.png)\\n\\nThe job once run reveals something interesting:\\n\\n![Anomaly detected](/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlowAnomalyDetection.png)\\n\\nNotice that _source.address_ 185.244.212.67 has had a high REJECT rate in the last 30 days.\xa0\\n\\nNotice where we found this before? In the AI Assistant!!!!!\\n\\nWhile we can run the AI Assistant and find this sort of anomaly, the ML job can be setup to run continuously and alert us on such spikes. This will help us understand if there are any issues with the webserver like we found above or even potential security attacks.\\n\\n\\n## Conclusion:\\n\\nYou’ve now seen how easily Elastic’s RAG-based AI Assistant can help analyze VPC Flows without even the need to know query syntax, understand where the data is, and understand even the fields. Additionally, you’ve also seen how we can alert you when a potential issue or degradation in service (SLO). Check out our other blogs on AWS VPC Flow analysis in Elastic:\\n\\n1. A full set of integrations to manage VPC Flows and the [entire end-to-end deployment on AWS](https://www.elastic.co/observability-labs/blog/aws-service-metrics-monitor-observability-easy).\xa0\\n\\n2. Elastic has a simple-to-use [AWS Firehose integration](https://www.elastic.co/observability-labs/blog/aws-kinesis-data-firehose-observability-analytics).\xa0\\n\\n3. Elastic’s tools such as [Discover, spike analysis,\xa0 and anomaly detection help provide you with better insights and analysis](https://www.elastic.co/observability-labs/blog/vpc-flow-logs-monitoring-analytics-observability).\\n\\n4. And a set of simple [Out-of-the-box dashboards](https://www.elastic.co/guide/en/observability/current/monitor-amazon-vpc-flow-logs.html#aws-firehose-dashboard)\\n\\n## Try it out\\n\\nExisting Elastic Cloud customers can access many of these features directly from the [Elastic Cloud console](https://cloud.elastic.co/). Not taking advantage of Elastic on the cloud? [Start a free trial](https://www.elastic.co/cloud/cloud-trial-overview).\\n\\nAll of this is also possible in your environment. [Learn how to get started today](https://www.elastic.co/observability/universal-profiling).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var w=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var m=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),y=(i,e)=>{for(var t in e)a(i,t,{get:e[t],enumerable:!0})},r=(i,e,t,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let s of g(e))!f.call(i,s)&&s!==t&&a(i,s,{get:()=>e[s],enumerable:!(o=w(e,s))||o.enumerable});return i};var b=(i,e,t)=>(t=i!=null?u(p(i)):{},r(e||!i||!i.__esModule?a(t,\\"default\\",{value:i,enumerable:!0}):t,i)),v=i=>r(a({},\\"__esModule\\",{value:!0}),i);var h=m((k,l)=>{l.exports=_jsx_runtime});var C={};y(C,{default:()=>d,frontmatter:()=>A});var n=b(h()),A={title:\\"AWS VPC Flow log analysis with GenAI in Elastic\\",slug:\\"aws-vpc-flow-log-analysis-with-genai-elastic\\",date:\\"2024-06-07\\",description:\\"Elastic has a set of embedded capabilities such as a GenAI RAG-based AI Assistant and a machine learning platform as part of the product baseline. These make analyzing the vast number of logs you get from AWS VPC Flows easier.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"21-cubes.jpeg\\",tags:[{slug:\\"aws\\"},{slug:\\"genai\\"},{slug:\\"aws-vpc-flow-logs\\"},{slug:\\"log-analytics\\"},{slug:\\"ai-assistant\\"}]};function c(i){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",hr:\\"hr\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...i.components},{Video:t}=e;return t||E(\\"Video\\",!0),(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"Elastic Observability provides a full observability solution, by supporting metrics, traces and logs for applications and infrastructure. In managing AWS deployments, VPC flow logs are critical in managing performance, network visibility, security, compliance, and overall management of your AWS environment. Several examples of :\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Where traffic is coming in from and going out to from the deployment, and within the deployment. This helps identify unusual or unauthorized communications\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Traffic volumes detecting spikes or drops which could indicate service issues in production or an increase in customer traffic\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Latency and Performance bottlenecks - with VPC Flow logs, you can look at latency for a flow (in and outflows), and understand patterns\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Accepted and rejected traffic helps determine where potential security threats and misconfigurations lie.\\\\xA0\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"AWS VPC Logs is a great example of how logs are great. Logging is an important part of Observability, for which we generally think of metrics and tracing. However, the amount of logs an application and the underlying infrastructure output can be significantly daunting with VPC Logs. However, it also provides a significant amount of insight.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Before we proceed, it is important to understand what Elastic provides in managing AWS and VPC Flow logs:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"A full set of integrations to manage VPC Flows and the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"entire end-to-end deployment on AWS\\"}),\\".\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic has a simple-to-use \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/aws-kinesis-data-firehose-observability-analytics\\",rel:\\"nofollow\\",children:\\"AWS Firehose integration\\"}),\\".\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic\\\\u2019s tools such as \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/vpc-flow-logs-monitoring-analytics-observability\\",rel:\\"nofollow\\",children:\\"Discover, spike analysis,\\\\xA0 and anomaly detection help provide you with better insights and analysis\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"And a set of simple \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/monitor-amazon-vpc-flow-logs.html#aws-firehose-dashboard\\",rel:\\"nofollow\\",children:\\"Out-of-the-box dashboards\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In today\\\\u2019s blog, we\\\\u2019ll cover how Elastics\\\\u2019 other features can support analyzing and RCA for potential VPC flow logs even more easily. Specifically, we will focus on managing the number of rejects, as this helps ensure there weren\\\\u2019t any unauthorized or unusual activities:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Set up an easy-to-use SLO (newly released) to detect when things are potentially degrading\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Create an ML job to analyze different fields of the VPC Flow log\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Using our newly released RAG-based AI Assistant to help analyze the logs without needing to know Elastic\\\\u2019s query language nor how to even graph on Elastic\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"ES|QL will help understand and analyze add latency for patterns.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In subsequent blogs, we will use AI Assistant and ESQL to show how to get other insights beyond just REJECT/ACCEPT from VPC Flow log.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Ensure you have an account on \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\").\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Follow the steps in the following blog to get \\",(0,n.jsx)(e.a,{href:\\"https://github.com/aws-samples/aws-three-tier-web-architecture-workshop\\",rel:\\"nofollow\\",children:\\"AWS\\\\u2019s three-tier app\\"}),\\" installed instructed in git, and bring in the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/aws-kinesis-data-firehose-observability-analytics\\",rel:\\"nofollow\\",children:\\"AWS VPC Flow logs\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Ensure you have an \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-settings.html\\",rel:\\"nofollow\\",children:\\"ML node configured\\"}),\\" in your Elastic stack\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"To use the AI Assistant you will need a trial or upgrade to Platinum.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"slo-with-vpc-flow-logs\\",children:\\"SLO with VPC Flow Logs\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic\\\\u2019s SLO capability is based directly on the Google SRE Handbook. All the definitions and semantics are utilized as described in Google\\\\u2019s SRE handbook. Hence users can perform the following on SLOs in Elastic:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Define an SLO on Logs not just metrics - Users can use KQL (log-based query), service availability, service latency, custom metric, histogram metric, or a timeslice metric.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Define SLO, SLI, Error budget and burn rates. Users can also use occurrence versus time slice-based budgeting.\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Manage, with dashboards, all the SLOs in a singular location.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Trigger alerts from the defined SLO, whether the SLI is off, the burn rate is used up, or the error rate is X.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Setting up an SLO for VPC is easy. You simply create a query you want to trigger off. In our case, we look for all the good events where \\",(0,n.jsx)(e.em,{children:\\"aws.vpcflow.action=ACCEPT\\"}),\\" and we define the target at 85%.\\\\xA0\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlowSLOsetup.png\\",alt:\\"Setting up SLO for VPC FLow log\\",width:\\"921\\",height:\\"912\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As the following example shows, over the last 7 days, we have exceeded our budget by 43%. Additionally, we have not complied for the last 7 days.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlowSLOMiss.png\\",alt:\\"VPC Flow Reject SLO\\",width:\\"1706\\",height:\\"987\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"analyzing-the-slo-with-ai-assistant\\",children:\\"Analyzing the SLO with AI Assistant\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that we see that there is an issue with the VPC Flows, we immediately work with the AI Assistant to start analyzing the SLO. Because it\'s a chat interface we simply open the AI Assistant and work through some simple analysis: (See Animated GIF for a demo below)\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"ai-assistant-analysis\\",children:\\"AI Assistant analysis:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsxs)(e.strong,{children:[\\"what were the top 3 source.address that had \\",(0,n.jsx)(e.em,{children:\\"aws.vpcflow.action=REJECT\\"}),\\" over the last 7 days, which is causing this SLO issue?\\"]}),\\" - We wanted to simply see what could be causing the loss in error budget. Were there any particular source.addresses causing a heavy reject rate.\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"The answer: A table with the highest count = 42670 and \\",(0,n.jsx)(e.em,{children:\\"source.address = 79.110.62.185\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"There is one singular \\",(0,n.jsx)(e.em,{children:\\"source.address\\"}),\\" that is causing the loss in SLO.\\\\xA0\\"]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsxs)(e.strong,{children:[\\"What is the largest number of\\\\xA0 \\",(0,n.jsx)(e.em,{children:\\"aws.vpcflow.action=REJECT\\"}),\\" in a 30 min time frame for the last 3 days where the \\",(0,n.jsx)(e.em,{children:\\"source.address=79.110.62.185\\"}),\\"?\\"]}),\\" - After understanding that a specific source.address is causing the loss in SLO, we want to understand the averages.\\\\xA0\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\'**The answer: ** \\"The largest number of \',(0,n.jsx)(e.em,{children:\\"aws.vpcflow.action=REJECT\\"}),\\" in a 30-minute time frame for the last 3 days where the \\",(0,n.jsx)(e.em,{children:\\"source.address\\"}),\\" is 79.110.62.185 is 229. This occurred on 2024-06-01T04:00:00.000Z.\\\\u201D\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"It means there must be a low REJECT rate but fairly consistent vs spiky over the last 7 days.\\"}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsxs)(e.strong,{children:[\\"for the logs with \\",(0,n.jsx)(e.em,{children:\\"source.address\\"}),\'=\\"79.110.62.185\\" was there any country code of \',(0,n.jsx)(e.em,{children:\\"source.geo.country_iso_code\\"}),\\" field present. If yes what is the value\\"]}),\\" - Given the last question showed a low REJECT rate, it only means that this was fairly consistent vs spiky over the last 7 days.\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"The answer:\\"}),\\" Yes, there is a country code present in the \\",(0,n.jsx)(e.em,{children:\\"source.geo.country_iso_code\\"}),\\" field for logs with \\",(0,n.jsx)(e.em,{children:\\"source.address\\"}),\'=\\"79.110.62.185\\". The value is BG (Bulgaria).\']}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsxs)(e.strong,{children:[\\"Is there a specific destination.address where \\",(0,n.jsx)(e.em,{children:\\"source.address=79.110.62.185\\"}),\\" is getting a \\",(0,n.jsx)(e.em,{children:\\"aws.vpcflow.action=REJECT\\"}),\\". Give me both the destination.address and the number of REJECTs for that destination.address?\\"]})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"The answer:\\"}),\\" destination.address of 10.0.0.27 is giving a reject number of 53433 in this time frame.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsxs)(e.strong,{children:[\\"Graph the number of REJECT vs ACCEPT for \\",(0,n.jsx)(e.em,{children:\\"source.address\\"}),\'=\\"79.110.62.185\\" over the last 7 days. The graph is on a daily basis in a singular graph\']}),\\" - We asked this question to see what the comparison is between ACCEPT and REJECT.\\\\xA0\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"The answer:\\"}),\\" See the animated GIF to see that the generated graph is fairly stable\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Were there any source.address that had a spike, high reject rate in. a 30min period over the 30 days?\\"}),\\" - We wanted to see if there was any other spike\\\\xA0\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"The answer\\"}),\\" - Yes, there was a source.address that had a spike in high reject rates in a 30-minute period over the last 30 days. \\",(0,n.jsx)(e.em,{children:\\"source.address\\"}),\\": 185.244.212.67, Reject Count: 8975, Time Period: 2024-05-22T03:00:00.000Z\\"]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.hr,{}),`\\n`,(0,n.jsx)(e.h3,{id:\\"watch-the-flow\\",children:\\"Watch the flow\\"}),`\\n`,(0,n.jsx)(t,{vidyardUuid:\\"1jvEpzfkci9j6AoL42XWA3\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"potential-issue\\",children:\\"Potential issue:\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"he server handling requests from source \\",(0,n.jsx)(e.strong,{children:(0,n.jsx)(e.em,{children:\\"79.110.62.185\\"})}),\\" is potentially having an issue.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Again using logs, we essentially asked the AI Assistant to give the \\",(0,n.jsx)(e.em,{children:\\"eni\\"}),\\" ids where the internal ip address was 10.0.0.27\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlow-findingwebserver.png\\",alt:\\"Finding the issue - webserver\\",width:\\"1529\\",height:\\"856\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"From our AWS console, we know that this is the webserver. Further analysis in Elastic, and with the developers we realized there is a new version that was installed recently causing a problem with connections.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"locating-anomalies-with-ml\\",children:\\"Locating anomalies with ML\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"While using the AI Assistant is great for analyzing information, another important aspect of VPC flow management is to ensure you can manage log spikes and anomalies. Elastic has a machine learning platform that allows you to develop jobs to analyze specific metrics or multiple metrics to look for anomalies.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"VPC Flow logs come with a large amount of information. The full set of fields is listed in \\",(0,n.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/vpc/latest/userguide/flow-logs.html#flow-logs-basics\\",rel:\\"nofollow\\",children:\\"AWS docs\\"}),\\". We will use a specific subset to help detect anomalies.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"We were setting up anomalies for aws.vpcflow.action=REJECT, which requires us to use multimetric anomaly detection in Elastic.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The config we used utilizes:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Detectors:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"destination.address\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"destination.port\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Influencers:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"source.address\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"aws.vpcflow.action\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"destination.geo.region_iso_code\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The way we set this up will help us understand if there is a large spike in REJECT/ACCEPT against \\",(0,n.jsx)(e.em,{children:\\"destination.address\\"}),\\" values from a specific \\",(0,n.jsx)(e.em,{children:\\"source.address\\"}),\\" and/or \\",(0,n.jsx)(e.em,{children:\\"destination.geo.region_iso_code\\"}),\\" location.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlowanomalysetup.png\\",alt:\\"Anomaly detection job config\\",width:\\"1594\\",height:\\"803\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The job once run reveals something interesting:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/VPCFlowAnomalyDetection.png\\",alt:\\"Anomaly detected\\",width:\\"2894\\",height:\\"1906\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Notice that \\",(0,n.jsx)(e.em,{children:\\"source.address\\"}),\\" 185.244.212.67 has had a high REJECT rate in the last 30 days.\\\\xA0\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Notice where we found this before? In the AI Assistant!!!!!\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"While we can run the AI Assistant and find this sort of anomaly, the ML job can be setup to run continuously and alert us on such spikes. This will help us understand if there are any issues with the webserver like we found above or even potential security attacks.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"You\\\\u2019ve now seen how easily Elastic\\\\u2019s RAG-based AI Assistant can help analyze VPC Flows without even the need to know query syntax, understand where the data is, and understand even the fields. Additionally, you\\\\u2019ve also seen how we can alert you when a potential issue or degradation in service (SLO). Check out our other blogs on AWS VPC Flow analysis in Elastic:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"A full set of integrations to manage VPC Flows and the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"entire end-to-end deployment on AWS\\"}),\\".\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic has a simple-to-use \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/aws-kinesis-data-firehose-observability-analytics\\",rel:\\"nofollow\\",children:\\"AWS Firehose integration\\"}),\\".\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic\\\\u2019s tools such as \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/vpc-flow-logs-monitoring-analytics-observability\\",rel:\\"nofollow\\",children:\\"Discover, spike analysis,\\\\xA0 and anomaly detection help provide you with better insights and analysis\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"And a set of simple \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/monitor-amazon-vpc-flow-logs.html#aws-firehose-dashboard\\",rel:\\"nofollow\\",children:\\"Out-of-the-box dashboards\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Existing Elastic Cloud customers can access many of these features directly from the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"Elastic Cloud console\\"}),\\". Not taking advantage of Elastic on the cloud? \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/cloud-trial-overview\\",rel:\\"nofollow\\",children:\\"Start a free trial\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"All of this is also possible in your environment. \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"Learn how to get started today\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,n.jsx)(e,{...i,children:(0,n.jsx)(c,{...i})}):c(i)}function E(i,e){throw new Error(\\"Expected \\"+(e?\\"component\\":\\"object\\")+\\" `\\"+i+\\"` to be defined: you likely forgot to import, pass, or provide it.\\")}return v(C);})();\\n;return Component;"},"_id":"articles/aws-vpc-flow-log-analysis-with-genai-elastic.mdx","_raw":{"sourceFilePath":"articles/aws-vpc-flow-log-analysis-with-genai-elastic.mdx","sourceFileName":"aws-vpc-flow-log-analysis-with-genai-elastic.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/aws-vpc-flow-log-analysis-with-genai-elastic"},"type":"Article","imageUrl":"/assets/images/aws-vpc-flow-log-analysis-with-genai-elastic/21-cubes.jpeg","readingTime":"10 min read","url":"/aws-vpc-flow-log-analysis-with-genai-elastic","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"SLO with VPC Flow Logs","href":"#slo-with-vpc-flow-logs"},{"level":2,"title":"Analyzing the SLO with AI Assistant","href":"#analyzing-the-slo-with-ai-assistant"},{"level":3,"title":"AI Assistant analysis:","href":"#ai-assistant-analysis"},{"level":3,"title":"Watch the flow","href":"#watch-the-flow"},{"level":3,"title":"Potential issue:","href":"#potential-issue"},{"level":2,"title":"Locating anomalies with ML","href":"#locating-anomalies-with-ml"},{"level":2,"title":"Conclusion:","href":"#conclusion"},{"level":2,"title":"Try it out","href":"#try-it-out"}]},{"title":"Best practices for instrumenting OpenTelemetry","slug":"best-practices-instrumenting-opentelemetry","date":"2023-09-13","description":"Instrumenting OpenTelemetry is complex. Even using auto-instrumentation requires understanding details about your application and OpenTelemetry configuration options. We’ll cover the best practices for instrumenting applications for OpenTelemetry.","image":"ecs-otel-announcement-3.jpeg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nOpenTelemetry (OTel) is steadily gaining broad industry adoption. As one of the major Cloud Native Computing Foundation (CNCF) projects, with as many commits as Kubernetes, it is gaining support from major ISVs and cloud providers delivering support for the framework. Many global companies from finance, insurance, tech, and other industries are starting to standardize on OpenTelemetry. With OpenTelemetry, DevOps teams have a consistent approach to collecting and ingesting telemetry data providing a de-facto standard for observability. With that, teams can rely on vendor-agnostic, future-proof instrumentation of their applications that allows them to switch observability backends without additional overhead in adapting instrumentation.\\n\\nTeams that have chosen OpenTelemetry for instrumentation face a choice between different instrumentation techniques and data collection approaches. Determining how to instrument and what mechanism to use can be challenging. In this blog, we will go over Elastic’s recommendations around some best practices for OpenTelemetry instrumentation:\\n\\n- **Automatic or manual?** We’ll cover the need for one versus the other and provide recommendations based on your situation.\\n- **Collector or direct from the application?** While the traditional option is to use a collector, observability tools like Elastic Observability can take telemetry from OpenTelemetry applications directly.\\n- **What to instrument from OTel SDKs.** Traces and metrics are well contributed to ([per the table in OTel docs](https://opentelemetry.io/docs/instrumentation/)), but logs are still in progress. Elastic\xae is improving the progress with its [contribution of ECS to OTel](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement). Regardless of the status from OTel, you need to test and ensure these instrumentations work for you.\\n- **Advantages and disadvantages of OpenTelemetry**\\n\\n## OTel automatic or manual instrumentation: Which one should I use?\\n\\nWhile there are two ways to instrument your applications with OpenTelemetry — automatic and manual — there isn’t a perfect answer, as it depends on your needs. There are pros and cons of using one versus another, such as:\\n\\n- Auto-magic experience vs. control over instrumentation\\n- Customization vs. out-of-the-box data\\n- Instrumentation overhead\\n- Simplicity vs. flexibility\\n\\nAdditionally, you might even land on a combination depending on availability and need.\\n\\nLet’s review both automatic and manual instrumentation and explore specific recommendations.\\n\\n### Auto-instrumentation\\n\\nFor most of the programming languages and runtimes, OpenTelemetry provides an auto-instrumentation approach for gathering telemetry data. Auto-instrumentation provides a set of pre-defined, out-of-the-box instrumentation modules for well-known frameworks and libraries. With that, users can gather telemetry data (such as traces, metrics, and logs) from well-known frameworks and libraries used by their application with only minimal or even no need for code changes.\\n\\nHere are some of the apparent benefits of using auto-instrumentation:\\n\\n- Quicker development and path to production. Auto-instrumentation saves time by accelerating the process of integrating telemetry into an application, allowing more focus on other critical tasks.\\n- Simpler maintenance by only having to update one line, which is usually the container start command where auto-instrumentation is configured, versus having to update multiple lines of code across multiple classes, methods, and services.\\n- Easier to keep up with the latest features and improvements in the OpenTelemetry project without manually updating the instrumentation of used libraries and/or code.\\n\\nThere are also some disadvantages and limitations of the auto-instrumentation approach:\\n\\n- Auto-instrumentation collects telemetry data only for the frameworks and libraries in use for which an explicit auto-instrumentation module exists. In particular, it’s unlikely that auto-instrumentation would collect telemetry data for “exotic” or custom libraries.\\n- Auto-instrumentation does not capture telemetry for pure custom code (that does not use well-known libraries underneath).\\n- Auto-instrumentation modules come with a pre-defined, opinionated instrumentation logic that provides sufficient and meaningful information in the vast majority of cases. However, in some custom edge cases, the information value, structure, or level of detail of the data provided by auto-instrumentation modules might be not sufficient.\\n- Depending on the runtime, technology, and size of the target application, auto-instrumentation may come with a (slightly) higher start-up or runtime overhead compared to manual instrumentation. In the majority of cases, this overhead is negligible but may become a problem in some edge cases.\\n\\n[Here](https://github.com/elastic/workshops-instruqt/blob/main/Elastiflix/python-favorite-otel-auto/Dockerfile) is an example of a Python application that was auto-instrumented with OpenTelemetry. If you had a Python application locally, you would add the code below to auto-instrument:\\n\\n```bash\\nopentelemetry-instrument \\\\\\n --traces_exporter OTEL_TRACES_EXPORTER \\\\\\n --metrics_exporter OTLP_METRICS_EXPORTER \\\\\\n --service_name OTLP_SERVICE_NAME \\\\\\n --exporter_otlp_endpoint OTEL_EXPORTER_TRACES_ENDPOINT \\\\\\n python main.py\\n```\\n\\n[Learn more about auto-instrumentation with OpenTelemetry for Python applications](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry).\\n\\nFinally, developers familiar with OpenTelemetry\'s APIs can leverage their existing knowledge by using auto-instrumentation, avoiding the complexities that may arise from manual instrumentation. However, manual instrumentation might still be preferred for specific use cases or when custom requirements cannot be fully addressed by auto-instrumentation.\\n\\n### Combination: Automatic and Manual\\n\\nBefore we proceed with manual instrumentation, you can also use a combination of automatic and manual instrumentation. As we noted above, if you start to understand the application’s behavior, then you can determine if you need some additional instrumentation for code that is not being traced by auto-instrumentation.\\n\\nAdditionally, because not all the auto-instrumentation is equal across the OTel language set, you will probably need to manually instrument in some cases — for example, if auto-instrumentation of a Flask-based Python application doesn’t automatically show middleware calls like calls to the requests library. In this situation, you will have to go with manual instrumentation for the Python application if you want to also see middleware tracing. However, as these libraries mature, more support options will become available.\\n\\nA combination is where most developers will ultimately land when the application gets to near production quality.\\n\\n### Manual instrumentation\\n\\nIf the auto-instrumentation does not cover your needs, you want to have more control over the instrumentation, or you’d like to treat instrumentation as code, using manual instrumentation is likely the right choice for you. As described above, you can use it as an enhancement to auto-instrumentation or entirely switch to manual instrumentation. If you eventually go down a path of manual instrumentation, it definitely provides more flexibility but also means you will have to not only code in the traces and metrics but also maintain it regularly.\\n\\nAs new features are added and changes to the libraries are made, the maintenance for the code may or may not be cumbersome. It’s a decision that requires some forethought.\\n\\nHere are some reasons why you would potentially use manual instrumentation:\\n\\n- You may already have some OTel instrumented applications using auto-instrumentation and need to add more telemetry for specific functions or libraries (like DBs or middleware), thus you will have to add manual instrumentation.\\n- You need more flexibility and control in terms of the application language and what you’d like to instrument.\\n- In case there\'s no auto-instrumentation available for your programming language and the technologies in use, manual instrumentation would be the way to go for your applications built using these languages.\\n- You might have to instrument for logging with an alternative approach, as logging is not yet stable for all the programming languages.\\n- You need to customize and enrich your telemetry data for your specific use cases — for example, you have a multi-tenant application and you need to get each tenant’s information and then use manual instrumentation via the OpenTelemetry SDK.\\n\\n**Recommendations for manual instrumentation** \\nManual instrumentation will require specific configuration to ensure you have the best experience with OTel. Below are Elastic’s recommendations (as outlined by the [CNCF](https://www.cncf.io/blog/2020/06/26/opentelemetry-best-practices-overview-part-2-2/)), for gaining the most benefits when instrumenting using the manual method:\\n\\n1. Ensure that your provider configuration and tracer initialization is done properly.\\n\\n2. Ensure you set up spans in all the functions you want traced.\\n\\n3. Set up resource attributes correctly.\\n\\n4. Use batch rather than simple processing.\\n\\nLet’s review these individually:\\n\\n**1. Ensure that your provider configuration and tracer initialization is done properly.** \\nThe general rule of thumb is to ensure you configure all your variables and tracer initialization in the front of the application. Using the [Elastiflix application’s Python favorite service](https://github.com/elastic/workshops-instruqt/tree/main/Elastiflix) as an example, we can see:\\n\\n_Tracer being set up globally_\\n\\n```python\\nfrom opentelemetry import trace\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\nfrom opentelemetry.sdk.resources import Resource\\n\\n...\\n\\n\\nresource = Resource.create(resource_attributes)\\n\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(exporter)\\nprovider.add_span_processor(processor)\\n\\n# Sets the global default tracer provider\\ntrace.set_tracer_provider(provider)\\n\\n# Creates a tracer from the global tracer provider\\ntracer = trace.get_tracer(otel_service_name)\\n```\\n\\nIn the above, we’ve added the OpenTelemetry trace module and imported the TraceProvider , which is the entry point of the API. It provides access to the Tracer, which is the class responsible for creating spans.\\n\\nAdditionally, we specify the use of BatchSpanProcessor. The span processor is an interface that provides hooks for span start and end method invocations.\\n\\nIn OpenTelemetry, different span processors are offered. The BatchSpanProcessor batches span and sends them in bulk. Multiple span processors can be configured to be active at the same time using the MultiSpanProcessor. [See OpenTelemetry Documentation](https://opentelemetry.io/docs/instrumentation/java/manual/#span-processor).\\n\\nThe variable otel_service_name is set in with environment variables (i.e., OTLP ENDPOINT and others) also set up globally. See below:\\n\\n```python\\notel_service_name = os.environ.get(\'OTEL_SERVICE_NAME\') or \'favorite_otel_manual\'\\nenvironment = os.environ.get(\'ENVIRONMENT\') or \'dev\'\\notel_service_version = os.environ.get(\'OTEL_SERVICE_VERSION\') or \'1.0.0\'\\n\\notel_exporter_otlp_headers = os.environ.get(\'OTEL_EXPORTER_OTLP_HEADERS\')\\notel_exporter_otlp_endpoint = os.environ.get(\'OTEL_EXPORTER_OTLP_ENDPOINT\')\\n```\\n\\nIn the above code, we initialize several variables. Because we also imported Resource, we initialize several variables:\\n\\n**Resource variables (we will cover this later in this article):**\\n\\n- otel_service_name – This helps set the name of the service (service.name) in otel Resource attributes.\\n- otel_service_version – This helps set the version of the service (service.version) in OTel Resource attributes.\\n- environment – This helps set the deployment.environment variable in OTel Resource attributes.\\n\\n**Exporter variables:**\\n\\n- otel_exporter_otlp_endpoint – This helps set the OTLP endpoint where traces, logs, and metrics are sent. Elastic would be an OTLP endpoint. You can also use OTEL_TRACES_EXPORTER or OTEL_METRICS_EXPORTER if you want to only send traces and/or metrics to specific endpoints.\\n- Otel_exporter_otlp_headers – This is the authorization needed for the endpoint.\\n\\nThe separation of your provider and tracer configuration allows you to use any OpenTelemetry provider and tracing framework that you choose.\\n\\n**2. Set up your spans inside the application functions themselves.** \\nMake sure your spans end and are in the right context so you can track the relationships between spans. In our Python favorite application, the function that retrieves a user’s favorite movies shows:\\n\\n```python\\n@app.route(\'/favorites\', methods=[\'GET\'])\\ndef get_favorite_movies():\\n # add artificial delay if enabled\\n if delay_time > 0:\\n time.sleep(max(0, random.gauss(delay_time/1000, delay_time/1000/10)))\\n\\n with tracer.start_as_current_span(\\"get_favorite_movies\\") as span:\\n user_id = str(request.args.get(\'user_id\'))\\n\\n logger.info(\'Getting favorites for user \' + user_id, extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": request.args.get(\'user_id\')\\n })\\n\\n favorites = r.smembers(user_id)\\n\\n # convert to list\\n favorites = list(favorites)\\n logger.info(\'User \' + user_id + \' has favorites: \' + str(favorites), extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": user_id\\n })\\n return { \\"favorites\\": favorites}\\n```\\n\\nWhile you can instrument every function, it’s strongly recommended that you instrument what you need to avoid a flood of data. The need will be dependent not only on the development process needs but also on what SRE and potentially the business needs to observe with the application. Instrument for your target use cases.\\n\\nAlso, avoid instrumenting trivial/utility methods/functions or such that are intended to be called extensively (e.g., getter/setter functions). Otherwise, this would produce a huge amount of telemetry data with very low additional value.\\n\\n**3. Set resource attributes and use semantic conventions**\\n\\n_ **Resource attributes** _ \\nAttributes such as service.name, tracer, development.environment, and cloud are important in managing version, environment, cloud provider, etc. for the specific service. Resource attributes describe resources such as hosts, systems, processes, and services and do not change during the lifetime of the resource. Resource attributes are a great help for correlating data, providing additional context to telemetry data and, thus, helping narrow down root causes of problems during troubleshooting. While it is simple to set up in auto-instrument, you need to ensure you also send these through in your application.\\n\\nCheck out OpenTelemetry’s list of attributes that can be set in the [OTel documentation](https://opentelemetry.io/docs/specs/otel/resource/semantic_conventions/#semantic-attributes-with-sdk-provided-default-value).\\n\\nIn our auto-instrumented Python application from above, here is how we set up resource attributes:\\n\\n```bash\\nopentelemetry-instrument \\\\\\n --traces_exporter console,otlp \\\\\\n --metrics_exporter console \\\\\\n --service_name your-service-name \\\\\\n --exporter_otlp_endpoint 0.0.0.0:4317 \\\\\\n python myapp.py\\n```\\n\\nHowever, when instrumenting manually, you need to add your resource attributes and ensure you have consistent values across your application’s code. Resource attributes have been defined by OpenTelemetry’s Resource Semantic Convention and can be found [here](https://opentelemetry.io/docs/specs/semconv/resource/). In fact, your organization should have a resource attribute convention that is applied across all applications.\\n\\nThese attributes are added to your metrics, traces, and logs, helping you filter out data, correlate, and make more sense out of them.\\n\\nHere is an example of setting resource attributes in our Python service:\\n\\n```python\\nresource_attributes = {\\n \\"service.name\\": otel_service_name,\\n \\"telemetry.version\\": otel_service_version,\\n \\"Deployment.environment\\": environment\\n\\n}\\n\\nresource = Resource.create(resource_attributes)\\n\\nprovider = TracerProvider(resource=resource)\\n```\\n\\nWe’ve set up service.name, service.version, and deployment.environment. You can set up as many resource attributes as you need, but you need to ensure you pass the resource attributes into the tracer with provider = TracerProvider(resource=resource).\\n\\n_ **Semantic conventions** _ \\nIn addition to adding the appropriate resource attributes to the code, the OpenTelemetry semantic conventions are important. Another one is about semantic conventions for specific technologies used in building your application with specific infrastructure. For example, if you need to instrument databases, there is no automatic instrumentation. You will have to manually instrument for tracing against the database. In doing so, you should utilize the [semantic conventions for database calls in OpenTelemetry](https://opentelemetry.io/docs/specs/semconv/database/database-spans/).\\n\\nSimilarly, if you are trying to trace Kafka or RabbitMQ, you can follow the [OpenTelemetry semantic conventions for messaging systems](https://opentelemetry.io/docs/specs/semconv/messaging/).\\n\\nThere are multiple semantic conventions across several areas and signal types that can be followed using OpenTelemetry — [check out the details](https://opentelemetry.io/docs/specs/semconv/).\\n\\n**4. Use Batch or simple processing?** \\nUsing simple or batch processing depends on your specific observability requirements. The advantages of batch processing include improved efficiency and reduced network overhead. Batch processing allows you to process telemetry data in batches, enabling more efficient data handling and resource utilization. On the other hand, batch processing increases the lag time for telemetry data to appear in the backend, as the span processor needs to wait for a sufficient amount of data to send over to the backend.\\n\\nWith simple processing, you send your telemetry data as soon as the data is generated, resulting in real-time observability. However, you will need to prepare for higher network overhead and more resources required to process all the separate data transmissions.\\n\\nHere is what we used to set this up in Python:\\n\\n```python\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\n\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(exporter)\\nprovider.add_span_processor(processor)\\n\\n# Sets the global default tracer provider\\ntrace.set_tracer_provider(provider)\\n```\\n\\nYour observability goals and budgetary constraints are the deciding factors when choosing batch or simple processing. A hybrid approach can also be implemented. If real-time insights are critical for an ecommerce application, for example, then simple processing would be the better approach. For other applications where real-time insights are not crucial, consider batch processing. Often, experimenting with both approaches and seeing how your observability backend handles the data is a fruitful exercise to hone in on what approach works best for the business.\\n\\n## Use the OpenTelemetry Collector or go direct?\\n\\nWhen starting out with OpenTelemetry, ingesting and transmitting telemetry data directly to a backend such as Elastic is a good way to get started. Often, you would be using the OTel direct method in the development phase and in a local environment.\\n\\nHowever, as you deploy your applications to production, the applications become fully responsible for ingesting and sending telemetry data. The amount of data sent in a local environment or during development would be miniscule compared to a production environment. With millions or even billions of users interacting with your applications, the work of ingesting and sending telemetry data in addition to the core application functions can become resource-intensive. Thus, offloading the collection, processing, and exporting of telemetry data over to a backend such as Elastic using the vendor-agnostic OTel Collector would enable your applications to perform more efficiently, leading to a better customer experience.\\n\\n![1 microservices flowchart](/assets/images/best-practices-instrumenting-opentelemetry/elastic-blog-1-microservices-flowchart.png)\\n\\n### Advantages of using the OpenTelemetry Collector\\n\\nFor cloud-native and microservices-based applications, the OpenTelemetry Collector provides the flexibility to handle multiple data formats and, more importantly, offloads the resources required from the application to manage telemetry data. The result: reduced application overhead and ease of management as the telemetry configuration can now be managed in one place.\\n\\nThe OTel Collector is the most common configuration because the OTel Collector is used:\\n\\n- To enrich the telemetry data with additional context information — for example, on Kubernetes, the OTel Collector would take the responsibility to enrich all the telemetry with the corresponding K8s pod and node information (labels, pod-name, etc.)\\n- To provide uniform and consistent processing or transform telemetry data in a central place (i.e., OTel Collector) rather than take on the burden of syncing configuration across hundreds of services to ensure consistent processing\\n- To aggregate metrics across multiple instances of a service, which is only doable on the OTel Collector (not within individual SDKs/agents)\\n\\nKey features of the OpenTelemetry Collector include:\\n\\n- **Simple setup:** The [setup documentation](https://opentelemetry.io/docs/collector/getting-started/) is clear and comprehensive. We also have an example setup using Elastic and the OTel Collector documented from [this blog](https://www.elastic.co/blog/opentelemetry-observability).\\n- **Flexibility:** The OTel Collector offers many configuration options and allows you to easily integrate into your existing [observability solution](https://www.elastic.co/observability). However, [OpenTelemetry’s pre-built distributions](https://opentelemetry.io/docs/collector/distributions/) allow you to start quickly and build the features that you need. [Here](https://github.com/bshetti/opentelemetry-microservices-demo/blob/main/deploy-with-collector-k8s/otelcollector.yaml) as well as below is an example of the code that we used to build our collector for an application running on Kubernetes.\\n\\n```yaml\\n---\\napiVersion: apps/v1\\nkind: Deployment\\nmetadata:\\n name: otelcollector\\nspec:\\n selector:\\n matchLabels:\\n app: otelcollector\\n template:\\n metadata:\\n labels:\\n app: otelcollector\\n spec:\\n serviceAccountName: default\\n terminationGracePeriodSeconds: 5\\n containers:\\n - command:\\n - \\"/otelcol\\"\\n - \\"--config=/conf/otel-collector-config.yaml\\"\\n image: otel/opentelemetry-collector:0.61.0\\n name: otelcollector\\n resources:\\n limits:\\n cpu: 1\\n memory: 2Gi\\n requests:\\n cpu: 200m\\n memory: 400Mi\\n```\\n\\n- **Collect host metrics:** Using the OTel Collector allows you to capture infrastructure metrics, including CPU, RAM, storage capacity, and more. This means you won’t need to install a separate infrastructure agent to collect host metrics. An example OTel configuration for ingesting host metrics is below.\\n\\n```yaml\\nreceivers:\\n hostmetrics:\\n scrapers:\\n cpu:\\n disk:\\n```\\n\\n- **Security:** The OTel Collector operates in a secure manner by default. It can filter out sensitive information based on your configuration. OpenTelemetry provides [these security guidelines](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/security-best-practices.md) to ensure your security needs are met.\\n- **Tail-based sampling for distributed tracing:** With OpenTelemetry, you can specify the sampling strategy you would like to use for capturing traces. Tail-based sampling is available by default with the OTel Collector. With tail-based sampling, you control and thereby reduce the amount of trace data collected. More importantly, you capture the most relevant traces, enabling you to spot issues within your microservices applications much faster.\\n\\n## What about logs?\\n\\nOpenTelemetry’s approach to ingesting metrics and traces is a “clean-sheet design.” OTel developed a new API for metrics and traces and implementations for multiple languages. For logs, on the other hand, due to the broad adoption and existence of legacy log solutions and libraries, support from OTel is the least mature.\\n\\nToday, OpenTelemetry’s solution for logs is to provide integration hooks to existing solutions. Longer term though, OpenTelemetry aims to incorporate context aggregation with logs thus easing logging correlation with metrics and traces. [Learn more about OpenTelemetry’s vision](https://opentelemetry.io/docs/specs/otel/logs/#opentelemetry-solution).\\n\\nElastic has written up its recommendations in the following article: [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic). Here is a brief summary of what Elastic recommends:\\n\\n1. Output logs from your service (alongside traces and metrics) using an embedded [OpenTelemetry Instrumentation library](https://opentelemetry.io/docs/instrumentation/#status-and-releases) to Elastic via the OTLP protocol.\\n\\n2. Write logs from your service to a file scrapped by the [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/), which then forwards to Elastic via the OTLP protocol.\\n\\n3. Write logs from your service to a file scrapped by [Elastic Agent](https://www.elastic.co/elastic-agent) (or [Filebeat](https://www.elastic.co/beats/filebeat)), which then forwards to Elastic via an Elastic-defined protocol.\\n\\nThe third approach, where developers have their logs scraped using an Elastic Agent, is the recommended approach, as Elastic provides a widely adopted and proven method for capturing logs from applications and services using OTel. The first two approaches, although both use OTel instrumentation, are not yet mature and aren\'t ready for production-level applications.\\n\\nGet more details about the three approaches in this [Elastic blog](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic) which includes a deep-dive discussion with hands-on implementation, architecture, advantages, and disadvantages.\\n\\n## It’s not all sunshine and roses\\n\\nOpenTelemetry is definitely beneficial to obtaining observability for modern cloud-native distributed applications. Having a standardized framework for ingesting telemetry reduces operational expenses and allows the organization to focus more on application innovation. Even with all the advantages of using OTel, there are some limitations that you should be aware of as well.\\n\\nBut first, here are the advantages of using OpenTelemetry:\\n\\n- **Standardized instrumentation:** Having a consistent method for instrumenting systems up and down the stack gives organizations more operational efficiency and cost-effective observability.\\n- **Auto-instrumentation:** OTel provides organizations with the ability to auto-instrument popular libraries and frameworks enabling them to quickly get up and running and requiring minimal changes to the codebase.\\n- **Vendor neutrality:** Organizations don’t have to be tied to one vendor for their observability needs. In fact, they can use several of them, using OTel to try one out or have a more best-of-breed approach if desired.\\n- **Future-proof instrumentation:** Since OpenTelemetry is open-source and has a vast ecosystem of support, your organization will be using technology that will be constantly innovated and can scale and grow with the business.\\n\\nThere are some limitations as well:\\n\\n- Instrumenting with OTel is a fork-lift upgrade. Organizations must be aware that time and effort needs to be invested to migrate proprietary instrumentation to OpenTelemetry.\\n- The [language SDKs](https://opentelemetry.io/docs/instrumentation/) are at a different maturity level, so applications with alpha, beta, or experimental functional support may not provide the organization with the full benefits in the short term.\\n\\nOver time, the disadvantages will be reduced, especially as the maturity level of the functional components improves. Check the [OpenTelemetry status page](https://opentelemetry.io/status/) for updates on the status of the language SDKs, the collector, and overall specifications.\\n\\n## Using Elastic and migrating to OpenTelemetry at your speed\\n\\nTransitioning to OpenTelemetry is a challenge for most organizations, as it requires retooling existing proprietary APM agents on almost all applications. This can be daunting, but OpenTelemetry agents provide a mechanism to avoid having to modify the source code, otherwise known as auto-instrumentation. With auto-instrumentation, the only code changes will be to rip out the proprietary APM agent code. Additionally, you should also ensure you have an [observability tool that natively supports OTel](https://www.elastic.co/blog/opentelemetry-observability) without the need for additional agents, such as [Elastic Observability](https://www.elastic.co/blog/opentelemetry-observability).\\n\\n[Elastic recently donated Elastic Common Schema (ECS) in its entirety to OTel](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement). The goal was to ensure OTel can get to a standardized logging format. ECS, developed by the Elastic community over the past few years, provides a vehicle to allow OTel to provide a more mature logging solution.\\n\\nElastic provides native OTel support. You can directly send OTel telemetry into Elastic Observability without the need for a collector or any sort of processing normally used in the collector.\\n\\nHere are the configuration options in Elastic for OpenTelemetry:\\n\\n![](/assets/images/best-practices-instrumenting-opentelemetry/elastic-blog-2-otel-config-options.png)\\n\\nMost of Elastic Observability’s APM capabilities are available with OTel data. Some of these include:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services, distributed tracing\\n- Transactions (traces)\\n- Machine learning (ML) correlations\\n- Log correlation\\n\\nIn addition to Elastic’s APM and a unified view of the telemetry data, you will also be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\n\\nAlthough OpenTelemetry supports many programming languages, the [status of its major functional components](https://opentelemetry.io/docs/instrumentation/) — metrics, traces, and logs — are still at various stages. Thus migrating applications written in Java, Python, and JavaScript are good choices to start with as their metrics, traces, and logs (for Java) are stable.\\n\\nFor the other languages that are not yet supported, you can easily instrument those using Elastic Agents, therefore running your observability platform in mixed mode (Elastic Agents with OpenTelemetry agents).\\n\\n![services](/assets/images/best-practices-instrumenting-opentelemetry/elastic-blog-3-services.png)\\n\\nWe ran a variation of our standard Elastic Agent application with one service flipped to OTel — the newsletter-otel service. But we can easily and as needed convert each of these services to OTel as development resources allow.\\n\\nAs a result, you can take advantage of the benefits of OpenTelemetry, which include:\\n\\n- **Standardization:** OpenTelemetry provides a standard approach to telemetry collection, enabling consistency of processes and easier integration of different components.\\n- **Vendor-agnostic:** Since OpenTelemetry is open source, it is designed to be vendor-agnostic, allowing DevOps and SRE teams to work with other monitoring and observability backends reducing vendor lock-in.\\n- **Flexibility and extensibility:** With its flexible architecture and inherent design for extensibility, OpenTelemetry enables teams to create custom instrumentation and enrich their own telemetry data.\\n- **Community and support:** OpenTelemetry has a growing community of contributors and adopters. In fact, Elastic contributed to developing a common schema for metrics, logs, traces, and security events. Learn more [here](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement).\\n\\nOnce the other languages reach a stable state, you can then continue your migration to OpenTelemetry agents.\\n\\n## Summary\\n\\nOpenTelemetry has become the de facto standard for ingesting metrics, traces, and logs from cloud-native applications. It provides a vendor-agnostic framework for collecting telemetry data, enabling you to use the observability backend of your choice.\\n\\nAuto-instrumentation using OpenTelemetry is the fastest way for you to ingest your telemetry data and is an optimal way to get started with OTel. However, using manual instrumentation provides more flexibility, so it is often the next step in gaining deeper insights from your telemetry data.\\n\\n[OpenTelemetry visualization](https://www.elastic.co/observability/opentelemetry) also allows you to ingest your data directly or by using the OTel Collector. For local development, going direct is a great way to get your data to your observability backend; however, with production workloads, using the OTel Collector is recommended. The collector takes care of all the data ingestion and processing, enabling your applications to focus on functionality and not have to deal with any telemetry data tasks.\\n\\nLogging functionality is still at a nascent stage with OpenTelemetry, while ingesting metrics and traces is well established. For logs, if you’ve started down the OTel path, you can send your logs to Elastic using the OTLP protocol. Since Elastic has a very mature logging solution, a better approach would be to use an Elastic Agent to ingest logs.\\n\\nAlthough the long-term benefits are clear, organizations need to be aware that adopting OpenTelemetry means they would own their own instrumentation. Thus, appropriate resources and effort need to be incorporated in the development lifecycle. Over time, however, OpenTelemetry brings standardization to telemetry data ingestion, offering organizations vendor-choice, scalability, flexibility, and future-proofing of investments.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var u=Object.create;var r=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),v=(n,e)=>{for(var o in e)r(n,o,{get:e[o],enumerable:!0})},s=(n,e,o,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of m(e))!y.call(n,i)&&i!==o&&r(n,i,{get:()=>e[i],enumerable:!(a=p(e,i))||a.enumerable});return n};var b=(n,e,o)=>(o=n!=null?u(g(n)):{},s(e||!n||!n.__esModule?r(o,\\"default\\",{value:n,enumerable:!0}):o,n)),w=n=>s(r({},\\"__esModule\\",{value:!0}),n);var c=f((E,l)=>{l.exports=_jsx_runtime});var O={};v(O,{default:()=>h,frontmatter:()=>T});var t=b(c()),T={title:\\"Best practices for instrumenting OpenTelemetry\\",slug:\\"best-practices-instrumenting-opentelemetry\\",date:\\"2023-09-13\\",description:\\"Instrumenting OpenTelemetry is complex. Even using auto-instrumentation requires understanding details about your application and OpenTelemetry configuration options. We\\\\u2019ll cover the best practices for instrumenting applications for OpenTelemetry.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"ecs-otel-announcement-3.jpeg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"}]};function d(n){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"OpenTelemetry (OTel) is steadily gaining broad industry adoption. As one of the major Cloud Native Computing Foundation (CNCF) projects, with as many commits as Kubernetes, it is gaining support from major ISVs and cloud providers delivering support for the framework. Many global companies from finance, insurance, tech, and other industries are starting to standardize on OpenTelemetry. With OpenTelemetry, DevOps teams have a consistent approach to collecting and ingesting telemetry data providing a de-facto standard for observability. With that, teams can rely on vendor-agnostic, future-proof instrumentation of their applications that allows them to switch observability backends without additional overhead in adapting instrumentation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Teams that have chosen OpenTelemetry for instrumentation face a choice between different instrumentation techniques and data collection approaches. Determining how to instrument and what mechanism to use can be challenging. In this blog, we will go over Elastic\\\\u2019s recommendations around some best practices for OpenTelemetry instrumentation:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Automatic or manual?\\"}),\\" We\\\\u2019ll cover the need for one versus the other and provide recommendations based on your situation.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Collector or direct from the application?\\"}),\\" While the traditional option is to use a collector, observability tools like Elastic Observability can take telemetry from OpenTelemetry applications directly.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"What to instrument from OTel SDKs.\\"}),\\" Traces and metrics are well contributed to (\\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/\\",rel:\\"nofollow\\",children:\\"per the table in OTel docs\\"}),\\"), but logs are still in progress. Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" is improving the progress with its \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement\\",rel:\\"nofollow\\",children:\\"contribution of ECS to OTel\\"}),\\". Regardless of the status from OTel, you need to test and ensure these instrumentations work for you.\\"]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.strong,{children:\\"Advantages and disadvantages of OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"otel-automatic-or-manual-instrumentation-which-one-should-i-use\\",children:\\"OTel automatic or manual instrumentation: Which one should I use?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"While there are two ways to instrument your applications with OpenTelemetry \\\\u2014 automatic and manual \\\\u2014 there isn\\\\u2019t a perfect answer, as it depends on your needs. There are pros and cons of using one versus another, such as:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Auto-magic experience vs. control over instrumentation\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Customization vs. out-of-the-box data\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Instrumentation overhead\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Simplicity vs. flexibility\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Additionally, you might even land on a combination depending on availability and need.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s review both automatic and manual instrumentation and explore specific recommendations.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"auto-instrumentation\\",children:\\"Auto-instrumentation\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For most of the programming languages and runtimes, OpenTelemetry provides an auto-instrumentation approach for gathering telemetry data. Auto-instrumentation provides a set of pre-defined, out-of-the-box instrumentation modules for well-known frameworks and libraries. With that, users can gather telemetry data (such as traces, metrics, and logs) from well-known frameworks and libraries used by their application with only minimal or even no need for code changes.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here are some of the apparent benefits of using auto-instrumentation:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Quicker development and path to production. Auto-instrumentation saves time by accelerating the process of integrating telemetry into an application, allowing more focus on other critical tasks.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Simpler maintenance by only having to update one line, which is usually the container start command where auto-instrumentation is configured, versus having to update multiple lines of code across multiple classes, methods, and services.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Easier to keep up with the latest features and improvements in the OpenTelemetry project without manually updating the instrumentation of used libraries and/or code.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are also some disadvantages and limitations of the auto-instrumentation approach:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Auto-instrumentation collects telemetry data only for the frameworks and libraries in use for which an explicit auto-instrumentation module exists. In particular, it\\\\u2019s unlikely that auto-instrumentation would collect telemetry data for \\\\u201Cexotic\\\\u201D or custom libraries.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Auto-instrumentation does not capture telemetry for pure custom code (that does not use well-known libraries underneath).\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Auto-instrumentation modules come with a pre-defined, opinionated instrumentation logic that provides sufficient and meaningful information in the vast majority of cases. However, in some custom edge cases, the information value, structure, or level of detail of the data provided by auto-instrumentation modules might be not sufficient.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Depending on the runtime, technology, and size of the target application, auto-instrumentation may come with a (slightly) higher start-up or runtime overhead compared to manual instrumentation. In the majority of cases, this overhead is negligible but may become a problem in some edge cases.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/workshops-instruqt/blob/main/Elastiflix/python-favorite-otel-auto/Dockerfile\\",rel:\\"nofollow\\",children:\\"Here\\"}),\\" is an example of a Python application that was auto-instrumented with OpenTelemetry. If you had a Python application locally, you would add the code below to auto-instrument:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`opentelemetry-instrument \\\\\\\\\\n --traces_exporter OTEL_TRACES_EXPORTER \\\\\\\\\\n --metrics_exporter OTLP_METRICS_EXPORTER \\\\\\\\\\n --service_name OTLP_SERVICE_NAME \\\\\\\\\\n --exporter_otlp_endpoint OTEL_EXPORTER_TRACES_ENDPOINT \\\\\\\\\\n python main.py\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Learn more about auto-instrumentation with OpenTelemetry for Python applications\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Finally, developers familiar with OpenTelemetry\'s APIs can leverage their existing knowledge by using auto-instrumentation, avoiding the complexities that may arise from manual instrumentation. However, manual instrumentation might still be preferred for specific use cases or when custom requirements cannot be fully addressed by auto-instrumentation.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"combination-automatic-and-manual\\",children:\\"Combination: Automatic and Manual\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we proceed with manual instrumentation, you can also use a combination of automatic and manual instrumentation. As we noted above, if you start to understand the application\\\\u2019s behavior, then you can determine if you need some additional instrumentation for code that is not being traced by auto-instrumentation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Additionally, because not all the auto-instrumentation is equal across the OTel language set, you will probably need to manually instrument in some cases \\\\u2014 for example, if auto-instrumentation of a Flask-based Python application doesn\\\\u2019t automatically show middleware calls like calls to the requests library. In this situation, you will have to go with manual instrumentation for the Python application if you want to also see middleware tracing. However, as these libraries mature, more support options will become available.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"A combination is where most developers will ultimately land when the application gets to near production quality.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"manual-instrumentation\\",children:\\"Manual instrumentation\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If the auto-instrumentation does not cover your needs, you want to have more control over the instrumentation, or you\\\\u2019d like to treat instrumentation as code, using manual instrumentation is likely the right choice for you. As described above, you can use it as an enhancement to auto-instrumentation or entirely switch to manual instrumentation. If you eventually go down a path of manual instrumentation, it definitely provides more flexibility but also means you will have to not only code in the traces and metrics but also maintain it regularly.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As new features are added and changes to the libraries are made, the maintenance for the code may or may not be cumbersome. It\\\\u2019s a decision that requires some forethought.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here are some reasons why you would potentially use manual instrumentation:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"You may already have some OTel instrumented applications using auto-instrumentation and need to add more telemetry for specific functions or libraries (like DBs or middleware), thus you will have to add manual instrumentation.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"You need more flexibility and control in terms of the application language and what you\\\\u2019d like to instrument.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"In case there\'s no auto-instrumentation available for your programming language and the technologies in use, manual instrumentation would be the way to go for your applications built using these languages.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"You might have to instrument for logging with an alternative approach, as logging is not yet stable for all the programming languages.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"You need to customize and enrich your telemetry data for your specific use cases \\\\u2014 for example, you have a multi-tenant application and you need to get each tenant\\\\u2019s information and then use manual instrumentation via the OpenTelemetry SDK.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Recommendations for manual instrumentation\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Manual instrumentation will require specific configuration to ensure you have the best experience with OTel. Below are Elastic\\\\u2019s recommendations (as outlined by the \\",(0,t.jsx)(e.a,{href:\\"https://www.cncf.io/blog/2020/06/26/opentelemetry-best-practices-overview-part-2-2/\\",rel:\\"nofollow\\",children:\\"CNCF\\"}),\\"), for gaining the most benefits when instrumenting using the manual method:\\"]}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Ensure that your provider configuration and tracer initialization is done properly.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Ensure you set up spans in all the functions you want traced.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Set up resource attributes correctly.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Use batch rather than simple processing.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s review these individually:\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"1. Ensure that your provider configuration and tracer initialization is done properly.\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"The general rule of thumb is to ensure you configure all your variables and tracer initialization in the front of the application. Using the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/workshops-instruqt/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\\\u2019s Python favorite service\\"}),\\" as an example, we can see:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Tracer being set up globally\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`from opentelemetry import trace\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\nfrom opentelemetry.sdk.resources import Resource\\n\\n...\\n\\n\\nresource = Resource.create(resource_attributes)\\n\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(exporter)\\nprovider.add_span_processor(processor)\\n\\n# Sets the global default tracer provider\\ntrace.set_tracer_provider(provider)\\n\\n# Creates a tracer from the global tracer provider\\ntracer = trace.get_tracer(otel_service_name)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the above, we\\\\u2019ve added the OpenTelemetry trace module and imported the TraceProvider , which is the entry point of the API. It provides access to the Tracer, which is the class responsible for creating spans.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Additionally, we specify the use of BatchSpanProcessor. The span processor is an interface that provides hooks for span start and end method invocations.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In OpenTelemetry, different span processors are offered. The BatchSpanProcessor batches span and sends them in bulk. Multiple span processors can be configured to be active at the same time using the MultiSpanProcessor. \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/manual/#span-processor\\",rel:\\"nofollow\\",children:\\"See OpenTelemetry Documentation\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The variable otel_service_name is set in with environment variables (i.e., OTLP ENDPOINT and others) also set up globally. See below:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`otel_service_name = os.environ.get(\'OTEL_SERVICE_NAME\') or \'favorite_otel_manual\'\\nenvironment = os.environ.get(\'ENVIRONMENT\') or \'dev\'\\notel_service_version = os.environ.get(\'OTEL_SERVICE_VERSION\') or \'1.0.0\'\\n\\notel_exporter_otlp_headers = os.environ.get(\'OTEL_EXPORTER_OTLP_HEADERS\')\\notel_exporter_otlp_endpoint = os.environ.get(\'OTEL_EXPORTER_OTLP_ENDPOINT\')\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the above code, we initialize several variables. Because we also imported Resource, we initialize several variables:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Resource variables (we will cover this later in this article):\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"otel_service_name \\\\u2013 This helps set the name of the service (service.name) in otel Resource attributes.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"otel_service_version \\\\u2013 This helps set the version of the service (service.version) in OTel Resource attributes.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"environment \\\\u2013 This helps set the deployment.environment variable in OTel Resource attributes.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Exporter variables:\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"otel_exporter_otlp_endpoint \\\\u2013 This helps set the OTLP endpoint where traces, logs, and metrics are sent. Elastic would be an OTLP endpoint. You can also use OTEL_TRACES_EXPORTER or OTEL_METRICS_EXPORTER if you want to only send traces and/or metrics to specific endpoints.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Otel_exporter_otlp_headers \\\\u2013 This is the authorization needed for the endpoint.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The separation of your provider and tracer configuration allows you to use any OpenTelemetry provider and tracing framework that you choose.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"2. Set up your spans inside the application functions themselves.\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Make sure your spans end and are in the right context so you can track the relationships between spans. In our Python favorite application, the function that retrieves a user\\\\u2019s favorite movies shows:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`@app.route(\'/favorites\', methods=[\'GET\'])\\ndef get_favorite_movies():\\n # add artificial delay if enabled\\n if delay_time > 0:\\n time.sleep(max(0, random.gauss(delay_time/1000, delay_time/1000/10)))\\n\\n with tracer.start_as_current_span(\\"get_favorite_movies\\") as span:\\n user_id = str(request.args.get(\'user_id\'))\\n\\n logger.info(\'Getting favorites for user \' + user_id, extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": request.args.get(\'user_id\')\\n })\\n\\n favorites = r.smembers(user_id)\\n\\n # convert to list\\n favorites = list(favorites)\\n logger.info(\'User \' + user_id + \' has favorites: \' + str(favorites), extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": user_id\\n })\\n return { \\"favorites\\": favorites}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"While you can instrument every function, it\\\\u2019s strongly recommended that you instrument what you need to avoid a flood of data. The need will be dependent not only on the development process needs but also on what SRE and potentially the business needs to observe with the application. Instrument for your target use cases.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Also, avoid instrumenting trivial/utility methods/functions or such that are intended to be called extensively (e.g., getter/setter functions). Otherwise, this would produce a huge amount of telemetry data with very low additional value.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"3. Set resource attributes and use semantic conventions\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Resource attributes\\"}),\\" _\\",(0,t.jsx)(e.br,{}),`\\n`,\\"Attributes such as service.name, tracer, development.environment, and cloud are important in managing version, environment, cloud provider, etc. for the specific service. Resource attributes describe resources such as hosts, systems, processes, and services and do not change during the lifetime of the resource. Resource attributes are a great help for correlating data, providing additional context to telemetry data and, thus, helping narrow down root causes of problems during troubleshooting. While it is simple to set up in auto-instrument, you need to ensure you also send these through in your application.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Check out OpenTelemetry\\\\u2019s list of attributes that can be set in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/resource/semantic_conventions/#semantic-attributes-with-sdk-provided-default-value\\",rel:\\"nofollow\\",children:\\"OTel documentation\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In our auto-instrumented Python application from above, here is how we set up resource attributes:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`opentelemetry-instrument \\\\\\\\\\n --traces_exporter console,otlp \\\\\\\\\\n --metrics_exporter console \\\\\\\\\\n --service_name your-service-name \\\\\\\\\\n --exporter_otlp_endpoint 0.0.0.0:4317 \\\\\\\\\\n python myapp.py\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"However, when instrumenting manually, you need to add your resource attributes and ensure you have consistent values across your application\\\\u2019s code. Resource attributes have been defined by OpenTelemetry\\\\u2019s Resource Semantic Convention and can be found \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/semconv/resource/\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". In fact, your organization should have a resource attribute convention that is applied across all applications.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"These attributes are added to your metrics, traces, and logs, helping you filter out data, correlate, and make more sense out of them.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here is an example of setting resource attributes in our Python service:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`resource_attributes = {\\n \\"service.name\\": otel_service_name,\\n \\"telemetry.version\\": otel_service_version,\\n \\"Deployment.environment\\": environment\\n\\n}\\n\\nresource = Resource.create(resource_attributes)\\n\\nprovider = TracerProvider(resource=resource)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\\\\u2019ve set up service.name, service.version, and deployment.environment. You can set up as many resource attributes as you need, but you need to ensure you pass the resource attributes into the tracer with provider = TracerProvider(resource=resource).\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Semantic conventions\\"}),\\" _\\",(0,t.jsx)(e.br,{}),`\\n`,\\"In addition to adding the appropriate resource attributes to the code, the OpenTelemetry semantic conventions are important. Another one is about semantic conventions for specific technologies used in building your application with specific infrastructure. For example, if you need to instrument databases, there is no automatic instrumentation. You will have to manually instrument for tracing against the database. In doing so, you should utilize the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/semconv/database/database-spans/\\",rel:\\"nofollow\\",children:\\"semantic conventions for database calls in OpenTelemetry\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Similarly, if you are trying to trace Kafka or RabbitMQ, you can follow the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/semconv/messaging/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry semantic conventions for messaging systems\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"There are multiple semantic conventions across several areas and signal types that can be followed using OpenTelemetry \\\\u2014 \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/semconv/\\",rel:\\"nofollow\\",children:\\"check out the details\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"4. Use Batch or simple processing?\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Using simple or batch processing depends on your specific observability requirements. The advantages of batch processing include improved efficiency and reduced network overhead. Batch processing allows you to process telemetry data in batches, enabling more efficient data handling and resource utilization. On the other hand, batch processing increases the lag time for telemetry data to appear in the backend, as the span processor needs to wait for a sufficient amount of data to send over to the backend.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With simple processing, you send your telemetry data as soon as the data is generated, resulting in real-time observability. However, you will need to prepare for higher network overhead and more resources required to process all the separate data transmissions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here is what we used to set this up in Python:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`from opentelemetry.sdk.trace.export import BatchSpanProcessor\\n\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(exporter)\\nprovider.add_span_processor(processor)\\n\\n# Sets the global default tracer provider\\ntrace.set_tracer_provider(provider)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Your observability goals and budgetary constraints are the deciding factors when choosing batch or simple processing. A hybrid approach can also be implemented. If real-time insights are critical for an ecommerce application, for example, then simple processing would be the better approach. For other applications where real-time insights are not crucial, consider batch processing. Often, experimenting with both approaches and seeing how your observability backend handles the data is a fruitful exercise to hone in on what approach works best for the business.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"use-the-opentelemetry-collector-or-go-direct\\",children:\\"Use the OpenTelemetry Collector or go direct?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"When starting out with OpenTelemetry, ingesting and transmitting telemetry data directly to a backend such as Elastic is a good way to get started. Often, you would be using the OTel direct method in the development phase and in a local environment.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"However, as you deploy your applications to production, the applications become fully responsible for ingesting and sending telemetry data. The amount of data sent in a local environment or during development would be miniscule compared to a production environment. With millions or even billions of users interacting with your applications, the work of ingesting and sending telemetry data in addition to the core application functions can become resource-intensive. Thus, offloading the collection, processing, and exporting of telemetry data over to a backend such as Elastic using the vendor-agnostic OTel Collector would enable your applications to perform more efficiently, leading to a better customer experience.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/best-practices-instrumenting-opentelemetry/elastic-blog-1-microservices-flowchart.png\\",alt:\\"1 microservices flowchart\\",width:\\"1123\\",height:\\"467\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"advantages-of-using-the-opentelemetry-collector\\",children:\\"Advantages of using the OpenTelemetry Collector\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For cloud-native and microservices-based applications, the OpenTelemetry Collector provides the flexibility to handle multiple data formats and, more importantly, offloads the resources required from the application to manage telemetry data. The result: reduced application overhead and ease of management as the telemetry configuration can now be managed in one place.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The OTel Collector is the most common configuration because the OTel Collector is used:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"To enrich the telemetry data with additional context information \\\\u2014 for example, on Kubernetes, the OTel Collector would take the responsibility to enrich all the telemetry with the corresponding K8s pod and node information (labels, pod-name, etc.)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"To provide uniform and consistent processing or transform telemetry data in a central place (i.e., OTel Collector) rather than take on the burden of syncing configuration across hundreds of services to ensure consistent processing\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"To aggregate metrics across multiple instances of a service, which is only doable on the OTel Collector (not within individual SDKs/agents)\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Key features of the OpenTelemetry Collector include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Simple setup:\\"}),\\" The \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/getting-started/\\",rel:\\"nofollow\\",children:\\"setup documentation\\"}),\\" is clear and comprehensive. We also have an example setup using Elastic and the OTel Collector documented from \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"this blog\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Flexibility:\\"}),\\" The OTel Collector offers many configuration options and allows you to easily integrate into your existing \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"observability solution\\"}),\\". However, \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/distributions/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\\\u2019s pre-built distributions\\"}),\\" allow you to start quickly and build the features that you need. \\",(0,t.jsx)(e.a,{href:\\"https://github.com/bshetti/opentelemetry-microservices-demo/blob/main/deploy-with-collector-k8s/otelcollector.yaml\\",rel:\\"nofollow\\",children:\\"Here\\"}),\\" as well as below is an example of the code that we used to build our collector for an application running on Kubernetes.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`---\\napiVersion: apps/v1\\nkind: Deployment\\nmetadata:\\n name: otelcollector\\nspec:\\n selector:\\n matchLabels:\\n app: otelcollector\\n template:\\n metadata:\\n labels:\\n app: otelcollector\\n spec:\\n serviceAccountName: default\\n terminationGracePeriodSeconds: 5\\n containers:\\n - command:\\n - \\"/otelcol\\"\\n - \\"--config=/conf/otel-collector-config.yaml\\"\\n image: otel/opentelemetry-collector:0.61.0\\n name: otelcollector\\n resources:\\n limits:\\n cpu: 1\\n memory: 2Gi\\n requests:\\n cpu: 200m\\n memory: 400Mi\\n`})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Collect host metrics:\\"}),\\" Using the OTel Collector allows you to capture infrastructure metrics, including CPU, RAM, storage capacity, and more. This means you won\\\\u2019t need to install a separate infrastructure agent to collect host metrics. An example OTel configuration for ingesting host metrics is below.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`receivers:\\n hostmetrics:\\n scrapers:\\n cpu:\\n disk:\\n`})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Security:\\"}),\\" The OTel Collector operates in a secure manner by default. It can filter out sensitive information based on your configuration. OpenTelemetry provides \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/security-best-practices.md\\",rel:\\"nofollow\\",children:\\"these security guidelines\\"}),\\" to ensure your security needs are met.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Tail-based sampling for distributed tracing:\\"}),\\" With OpenTelemetry, you can specify the sampling strategy you would like to use for capturing traces. Tail-based sampling is available by default with the OTel Collector. With tail-based sampling, you control and thereby reduce the amount of trace data collected. More importantly, you capture the most relevant traces, enabling you to spot issues within your microservices applications much faster.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-about-logs\\",children:\\"What about logs?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"OpenTelemetry\\\\u2019s approach to ingesting metrics and traces is a \\\\u201Cclean-sheet design.\\\\u201D OTel developed a new API for metrics and traces and implementations for multiple languages. For logs, on the other hand, due to the broad adoption and existence of legacy log solutions and libraries, support from OTel is the least mature.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Today, OpenTelemetry\\\\u2019s solution for logs is to provide integration hooks to existing solutions. Longer term though, OpenTelemetry aims to incorporate context aggregation with logs thus easing logging correlation with metrics and traces. \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/logs/#opentelemetry-solution\\",rel:\\"nofollow\\",children:\\"Learn more about OpenTelemetry\\\\u2019s vision\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic has written up its recommendations in the following article: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"}),\\". Here is a brief summary of what Elastic recommends:\\"]}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Output logs from your service (alongside traces and metrics) using an embedded \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/#status-and-releases\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Instrumentation library\\"}),\\" to Elastic via the OTLP protocol.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Write logs from your service to a file scrapped by the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Collector\\"}),\\", which then forwards to Elastic via the OTLP protocol.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Write logs from your service to a file scrapped by \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/elastic-agent\\",rel:\\"nofollow\\",children:\\"Elastic Agent\\"}),\\" (or \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/beats/filebeat\\",rel:\\"nofollow\\",children:\\"Filebeat\\"}),\\"), which then forwards to Elastic via an Elastic-defined protocol.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The third approach, where developers have their logs scraped using an Elastic Agent, is the recommended approach, as Elastic provides a widely adopted and proven method for capturing logs from applications and services using OTel. The first two approaches, although both use OTel instrumentation, are not yet mature and aren\'t ready for production-level applications.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Get more details about the three approaches in this \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"Elastic blog\\"}),\\" which includes a deep-dive discussion with hands-on implementation, architecture, advantages, and disadvantages.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"its-not-all-sunshine-and-roses\\",children:\\"It\\\\u2019s not all sunshine and roses\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"OpenTelemetry is definitely beneficial to obtaining observability for modern cloud-native distributed applications. Having a standardized framework for ingesting telemetry reduces operational expenses and allows the organization to focus more on application innovation. Even with all the advantages of using OTel, there are some limitations that you should be aware of as well.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"But first, here are the advantages of using OpenTelemetry:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Standardized instrumentation:\\"}),\\" Having a consistent method for instrumenting systems up and down the stack gives organizations more operational efficiency and cost-effective observability.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Auto-instrumentation:\\"}),\\" OTel provides organizations with the ability to auto-instrument popular libraries and frameworks enabling them to quickly get up and running and requiring minimal changes to the codebase.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Vendor neutrality:\\"}),\\" Organizations don\\\\u2019t have to be tied to one vendor for their observability needs. In fact, they can use several of them, using OTel to try one out or have a more best-of-breed approach if desired.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Future-proof instrumentation:\\"}),\\" Since OpenTelemetry is open-source and has a vast ecosystem of support, your organization will be using technology that will be constantly innovated and can scale and grow with the business.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are some limitations as well:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Instrumenting with OTel is a fork-lift upgrade. Organizations must be aware that time and effort needs to be invested to migrate proprietary instrumentation to OpenTelemetry.\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/\\",rel:\\"nofollow\\",children:\\"language SDKs\\"}),\\" are at a different maturity level, so applications with alpha, beta, or experimental functional support may not provide the organization with the full benefits in the short term.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Over time, the disadvantages will be reduced, especially as the maturity level of the functional components improves. Check the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/status/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry status page\\"}),\\" for updates on the status of the language SDKs, the collector, and overall specifications.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"using-elastic-and-migrating-to-opentelemetry-at-your-speed\\",children:\\"Using Elastic and migrating to OpenTelemetry at your speed\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Transitioning to OpenTelemetry is a challenge for most organizations, as it requires retooling existing proprietary APM agents on almost all applications. This can be daunting, but OpenTelemetry agents provide a mechanism to avoid having to modify the source code, otherwise known as auto-instrumentation. With auto-instrumentation, the only code changes will be to rip out the proprietary APM agent code. Additionally, you should also ensure you have an \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"observability tool that natively supports OTel\\"}),\\" without the need for additional agents, such as \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement\\",rel:\\"nofollow\\",children:\\"Elastic recently donated Elastic Common Schema (ECS) in its entirety to OTel\\"}),\\". The goal was to ensure OTel can get to a standardized logging format. ECS, developed by the Elastic community over the past few years, provides a vehicle to allow OTel to provide a more mature logging solution.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic provides native OTel support. You can directly send OTel telemetry into Elastic Observability without the need for a collector or any sort of processing normally used in the collector.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here are the configuration options in Elastic for OpenTelemetry:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/best-practices-instrumenting-opentelemetry/elastic-blog-2-otel-config-options.png\\",alt:\\"\\",width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Most of Elastic Observability\\\\u2019s APM capabilities are available with OTel data. Some of these include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Dependencies between services, distributed tracing\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Machine learning (ML) correlations\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Log correlation\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and a unified view of the telemetry data, you will also be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Although OpenTelemetry supports many programming languages, the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/\\",rel:\\"nofollow\\",children:\\"status of its major functional components\\"}),\\" \\\\u2014 metrics, traces, and logs \\\\u2014 are still at various stages. Thus migrating applications written in Java, Python, and JavaScript are good choices to start with as their metrics, traces, and logs (for Java) are stable.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"For the other languages that are not yet supported, you can easily instrument those using Elastic Agents, therefore running your observability platform in mixed mode (Elastic Agents with OpenTelemetry agents).\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/best-practices-instrumenting-opentelemetry/elastic-blog-3-services.png\\",alt:\\"services\\",width:\\"1790\\",height:\\"909\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We ran a variation of our standard Elastic Agent application with one service flipped to OTel \\\\u2014 the newsletter-otel service. But we can easily and as needed convert each of these services to OTel as development resources allow.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As a result, you can take advantage of the benefits of OpenTelemetry, which include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Standardization:\\"}),\\" OpenTelemetry provides a standard approach to telemetry collection, enabling consistency of processes and easier integration of different components.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Vendor-agnostic:\\"}),\\" Since OpenTelemetry is open source, it is designed to be vendor-agnostic, allowing DevOps and SRE teams to work with other monitoring and observability backends reducing vendor lock-in.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Flexibility and extensibility:\\"}),\\" With its flexible architecture and inherent design for extensibility, OpenTelemetry enables teams to create custom instrumentation and enrich their own telemetry data.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Community and support:\\"}),\\" OpenTelemetry has a growing community of contributors and adopters. In fact, Elastic contributed to developing a common schema for metrics, logs, traces, and security events. Learn more \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once the other languages reach a stable state, you can then continue your migration to OpenTelemetry agents.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"OpenTelemetry has become the de facto standard for ingesting metrics, traces, and logs from cloud-native applications. It provides a vendor-agnostic framework for collecting telemetry data, enabling you to use the observability backend of your choice.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Auto-instrumentation using OpenTelemetry is the fastest way for you to ingest your telemetry data and is an optimal way to get started with OTel. However, using manual instrumentation provides more flexibility, so it is often the next step in gaining deeper insights from your telemetry data.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"OpenTelemetry visualization\\"}),\\" also allows you to ingest your data directly or by using the OTel Collector. For local development, going direct is a great way to get your data to your observability backend; however, with production workloads, using the OTel Collector is recommended. The collector takes care of all the data ingestion and processing, enabling your applications to focus on functionality and not have to deal with any telemetry data tasks.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Logging functionality is still at a nascent stage with OpenTelemetry, while ingesting metrics and traces is well established. For logs, if you\\\\u2019ve started down the OTel path, you can send your logs to Elastic using the OTLP protocol. Since Elastic has a very mature logging solution, a better approach would be to use an Elastic Agent to ingest logs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Although the long-term benefits are clear, organizations need to be aware that adopting OpenTelemetry means they would own their own instrumentation. Thus, appropriate resources and effort need to be incorporated in the development lifecycle. Over time, however, OpenTelemetry brings standardization to telemetry data ingestion, offering organizations vendor-choice, scalability, flexibility, and future-proofing of investments.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return w(O);})();\\n;return Component;"},"_id":"articles/best-practices-instrumenting-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/best-practices-instrumenting-opentelemetry.mdx","sourceFileName":"best-practices-instrumenting-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/best-practices-instrumenting-opentelemetry"},"type":"Article","imageUrl":"/assets/images/best-practices-instrumenting-opentelemetry/ecs-otel-announcement-3.jpeg","readingTime":"26 min read","url":"/best-practices-instrumenting-opentelemetry","headings":[{"level":2,"title":"OTel automatic or manual instrumentation: Which one should I use?","href":"#otel-automatic-or-manual-instrumentation-which-one-should-i-use"},{"level":3,"title":"Auto-instrumentation","href":"#auto-instrumentation"},{"level":3,"title":"Combination: Automatic and Manual","href":"#combination-automatic-and-manual"},{"level":3,"title":"Manual instrumentation","href":"#manual-instrumentation"},{"level":2,"title":"Use the OpenTelemetry Collector or go direct?","href":"#use-the-opentelemetry-collector-or-go-direct"},{"level":3,"title":"Advantages of using the OpenTelemetry Collector","href":"#advantages-of-using-the-opentelemetry-collector"},{"level":2,"title":"What about logs?","href":"#what-about-logs"},{"level":2,"title":"It’s not all sunshine and roses","href":"#its-not-all-sunshine-and-roses"},{"level":2,"title":"Using Elastic and migrating to OpenTelemetry at your speed","href":"#using-elastic-and-migrating-to-opentelemetry-at-your-speed"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Best Practices for Log Management: Leveraging Logs for Faster Problem Resolution","slug":"best-practices-logging","date":"2024-09-11","description":"Explore effective log management strategies to improve system reliability and performance. Learn about data collection, processing, analysis, and cost-effective management of logs in complex software environments.","image":"best-practices-log-management.png","author":[{"slug":"luca-wintergerst","type":"Author","_raw":{}},{"slug":"david-hope","type":"Author","_raw":{}},{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn today\'s rapid software development landscape, efficient log management is crucial for maintaining system reliability and performance. With expanding and complex infrastructure and application components, the responsibilities of operations and development teams are ever-growing and multifaceted. This blog post outlines best practices for effective log management, addressing the challenges of growing data volumes, complex infrastructures, and the need for quick problem resolution.\\n\\n## Understanding Logs and Their Importance\\n\\nLogs are records of events occurring within your infrastructure, typically including a timestamp, a message detailing the event, and metadata identifying the source. They are invaluable for diagnosing issues, providing early warnings, and speeding up problem resolution. Logs are often the primary signal that developers enable, offering significant detail for debugging, performance analysis, security, and compliance management.\\n\\n## The Logging Journey\\n\\nThe logging journey involves three basic steps: collection and ingestion, processing and enrichment, and analysis and rationalization. Let\'s explore each step in detail, covering some of the best practices for each section.\\n\\n![Logging Journey](/assets/images/best-practices-logging/blog-elastic-collection-and-ingest.png)\\n\\n### 1. Log Collection and Ingestion\\n\\n#### Collect Everything Relevant and Actionable\\n\\nThe first step is to collect all logs into a central location. This involves identifying all your applications and systems and collecting their logs. Comprehensive data collection ensures no critical information is missed, providing a complete picture of your system\'s behavior. In the event of an incident, having all logs in one place can significantly reduce the time to resolution. It\'s generally better to collect more data than you need, as you can always filter out irrelevant information later, as well as delete logs that are no longer needed more quickly.\\n\\n\\n#### Leverage Integrations\\n\\nElastic provides over 300 integrations that simplify data onboarding. These integrations not only collect data but also come with dashboards, saved searches, and pipelines to parse the data. Utilizing these integrations can significantly reduce manual effort and ensure data consistency.\\n\\n#### Consider Ingestion Capacity and Costs\\n\\nAn important aspect of log collection is ensuring you have sufficient ingestion capacity at a manageable cost. When assessing solutions, be cautious about those that charge significantly more for high cardinality data, as this can lead to unexpectedly high costs in observability solutions. We\'ll talk more about cost effective log management later in this post.\\n\\n#### Use Kafka for Large Projects\\n\\nFor larger organizations, implementing Kafka can improve log data management. Kafka acts as a buffer, making the system more reliable and easier to manage. It allows different teams to send data to a centralized location, which can then be ingested into Elastic.\\n\\n### 2. Processing and Enrichment\\n\\n#### Adopt Elastic Common Schema (ECS)\\n\\nOne key aspect of log collection is to have the most amount of normalization across all of your applications and infrastructure. Having a common semantic schema is crucial. Elastic contributed Elastic Common Schema (ECS) to OpenTelemetry (OTel), helping accelerate the adoption of OTel-based observability and security. This move towards a more normalized way to define and ingest logs (including metrics and traces) is beneficial for the industry.\\n\\nUsing ECS helps standardize field names and data structures, making data analysis and correlation easier. This common schema ensures your data is organized predictably, facilitating more efficient querying and reporting. Learn more about ECS [here](https://www.elastic.co/guide/en/ecs/current/ecs-reference.html).\\n\\n#### Optimize Mappings for High Volume Data\\n\\nFor high cardinality fields or those rarely used, consider optimizing or removing them from the index. This can improve performance by reducing the amount of data that needs to be indexed and searched. Our documentation has sections to tune your setup for [disk usage](https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html), [search speed](https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html) and [indexing speed](https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-indexing-speed.html).\\n\\n#### Managing Structured vs. Unstructured Logs\\n\\nStructured logs are generally preferable as they offer more value and are easier to work with. They have a predefined format and fields, simplifying information extraction and analysis. For custom logs without pre-built integrations, you may need to define your own parsing rules.\\n\\nFor unstructured logs, full-text search capabilities can help mitigate limitations. By indexing logs, full-text search allows users to search for specific keywords or phrases efficiently, even within large volumes of unstructured data. This is one of the main differentiators of Elastic\'s observability solution. You can simply search for any keyword or phrase and get results in real-time, without needing to write complex regular expressions or parsing rules at query time.\\n\\n#### Schema-on-Read vs. Schema-on-Write\\n\\nThere are two main approaches to processing log data:\\n\\n1. Schema-on-read: Some observability dashboarding capabilities can perform runtime transformations to extract fields from non-parsed sources on the fly. This is helpful when dealing with legacy systems or custom applications that may not log data in a standardized format. However, runtime parsing can be time-consuming and resource-intensive, especially for large volumes of data.\\n\\n2. Schema-on-write: This approach offers better performance and more control over the data. The schema is defined upfront, and the data is structured and validated at the time of writing. This allows for faster processing and analysis of the data, which is beneficial for enrichment.\\n\\n### 3. Analysis and Rationalization\\n\\n#### Full-Text Search\\n\\nElastic\'s full-text search capabilities, powered by Elasticsearch, allow you to quickly find relevant logs. The Kibana Query Language (KQL) enhances search efficiency, enabling you to filter and drill down into the data to identify issues rapidly.\\n\\nHere are a few examples of KQL queries:\\n\\n```\\n// Filter documents where a field exists\\nhttp.request.method: *\\n\\n// Filter documents that match a specific value\\nhttp.request.method: GET\\n\\n// Search all fields for a specific value\\nHello\\n\\n// Filter documents where a text field contains specific terms\\nhttp.request.body.content: \\"null pointer\\"\\n\\n// Filter documents within a range\\nhttp.response.bytes < 10000\\n\\n// Combine range queries\\nhttp.response.bytes > 10000 and http.response.bytes <= 20000\\n\\n// Use wildcards to match patterns\\nhttp.response.status_code: 4*\\n\\n// Negate a query\\nnot http.request.method: GET\\n\\n// Combine multiple queries with AND/OR\\nhttp.request.method: GET and http.response.status_code: 400\\n```\\n\\n#### Machine Learning Integration\\n\\nMachine learning can automate the detection of anomalies and patterns within your log data. Elastic offers features like log rate analysis that automatically identify deviations from normal behavior. By leveraging machine learning, you can proactively address potential issues before they escalate.\\n\\n\\n![Machine Learning](/assets/images/best-practices-logging/screenshot-machine-learning-smv-anomaly.png)\\n\\nIt is recommended that organizations utilize a diverse arsenal of machine learning algorithms and techniques to effectively uncover unknown-unknowns in log files. Unsupervised machine learning algorithms, should be employed for anomaly detection on real-time data, with rate-controlled alerting based on severity. \\n\\nBy automatically identifying influencers, users can gain valuable context for automated root cause analysis (RCA). Log pattern analysis brings categorization to unstructured logs, while log rate analysis and change point detection help identify the root causes of spikes in log data.\\n\\nTake a look at the [documentation](https://www.elastic.co/guide/en/machine-learning/current/ml-overview.html) to get started with machine learning in Elastic.\\n\\n#### Dashboarding and Alerting\\n\\nBuilding dashboards and setting up alerting helps you monitor your logs in real-time. Dashboards provide a visual representation of your logs, making it easier to identify patterns and anomalies. Alerting can notify you when specific events occur, allowing you to take action quickly.\\n\\n## Cost-Effective Log Management\\n\\n### Use Data Tiers\\n\\nImplementing index lifecycle management to move data across hot, warm, cold, and frozen tiers can significantly reduce storage costs. This approach ensures that only the most frequently accessed data is stored on expensive, high-performance storage, while older data is moved to more cost-effective storage solutions.\\n\\n![ILM](/assets/images/best-practices-logging/ilm.png)\\n\\nOur documentation explains how to set up [Index Lifecycle Management](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-lifecycle-management.html).\\n\\n### Compression and Index Sorting\\n\\nApplying best compression settings and using index sorting can further reduce the data footprint. Optimizing the way data is stored on disk can lead to substantial savings in storage costs and improve retrieval performance. As of 8.15, Elasticsearch provides an indexing mode called \\"logsdb\\". This is a highly optimized way of storing log data. This new way of indexing data uses 2.5 times less disk space than the default mode. You can read more about it [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/logs-data-stream.html). This mode automatically applies the best combination of settings for compression, index sorting, and other optimizations that weren\'t accessible to users before.\\n\\n### Snapshot Lifecycle Management (SLM)\\n\\n![SLM](/assets/images/best-practices-logging/slm.png)\\n\\nSLM allows you to back up your data and delete it from the main cluster, freeing up resources. If needed, data can be restored quickly for analysis, ensuring that you maintain the ability to investigate historical events without incurring high storage costs.\\n\\nLearn more about SLM in the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/snapshot-lifecycle-management.html).\\n\\n### Dealing with Large Amounts of Log Data\\n\\nManaging large volumes of log data can be challenging. Here are some strategies to optimize log management:\\n\\n1. Develop a logs deletion policy. Evaluate what data to collect and when to delete it.\\n2. Consider discarding DEBUG logs or even INFO logs earlier, and delete dev and staging environment logs sooner.\\n3. Aggregate short windows of identical log lines, which is especially useful for TCP security event logging.\\n4. For applications and code you control, consider moving some logs into traces to reduce log volume while maintaining detailed information.\\n\\n### Centralized vs. Decentralized Log Storage\\n\\nData locality is an important consideration when managing log data. The costs of ingressing and egressing large amounts of log data can be prohibitively high, especially when dealing with cloud providers.\\n\\nIn the absence of regional redundancy requirements, your organization may not need to send all log data to a central location. Consider keeping log data local to the datacenter where it was generated to reduce ingress and egress costs.\\n\\n[Cross-cluster search](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-cross-cluster-search.html) functionality enables users to search across multiple logging clusters simultaneously, reducing the amount of data that needs to be transferred over the network.\\n\\n[Cross-cluster replication](https://www.elastic.co/guide/en/elasticsearch/reference/current/xpack-ccr.html) is useful for maintaining business continuity in the event of a disaster, ensuring data availability even during an outage in one datacenter.\\n\\n## Monitoring and Performance\\n\\n### Monitor Your Log Management System\\n\\nUsing a dedicated monitoring cluster can help you track the performance of your Elastic deployment. [Stack monitoring](https://www.elastic.co/guide/en/kibana/current/xpack-monitoring.html) provides metrics on search and indexing activity, helping you identify and resolve performance bottlenecks.\\n\\n### Adjust Bulk Size and Refresh Interval\\n\\n[Optimizing these settings](https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-indexing-speed.html) can balance performance and resource usage. Increasing bulk size and refresh interval can improve indexing efficiency, especially for high-throughput environments.\\n\\n## Logging Best Practices\\n\\n### Adjust Log Levels\\n\\nEnsure that log levels are appropriately set for all applications. Customize log formats to facilitate easier ingestion and analysis. Properly configured log levels can reduce noise and make it easier to identify critical issues.\\n\\n### Use Modern Logging Frameworks\\n\\nImplement logging frameworks that support structured logging. Adding metadata to logs enhances their usefulness for analysis. Structured logging formats, such as JSON, allow logs to be easily parsed and queried, improving the efficiency of log analysis.\\nIf you fully control the application and are already using structured logging, consider using [Elastic\'s version of these libraries](https://github.com/elastic/ecs-logging), which can automatically parse logs into ECS fields.\\n\\n### Leverage APM and Metrics\\n\\nFor custom-built applications, Application Performance Monitoring (APM) provides deeper insights into application performance, complementing traditional logging. APM tracks transactions across services, helping you understand dependencies and identify performance bottlenecks.\\n\\n![APM](/assets/images/best-practices-logging/apm.png)\\n\\nConsider collecting metrics alongside logs. Metrics can provide insights into your system\'s performance, such as CPU usage, memory usage, and network traffic. If you\'re already collecting logs from your systems, adding metrics collection is usually a quick process.\\n\\nTraces can provide even deeper insights into specific transactions or request paths, especially in cloud-native environments. They offer more contextual information and excel at tracking dependencies across services. However, implementing tracing is only possible for applications you own, and not all developers have fully embraced it yet.\\n\\nA combined logging and tracing strategy is recommended, where traces provide coverage for newer instrumented apps, and logging supports legacy applications and systems you don\'t own the source code for.\\n\\n## Conclusion\\n\\nEffective log management is essential for maintaining system reliability and performance in today\'s complex software environments. By following these best practices, you can optimize your log management process, reduce costs, and improve problem resolution times. \\n\\nKey takeaways include:\\n- Ensure comprehensive log collection with a focus on normalization and common schemas.\\n- Use appropriate processing and enrichment techniques, balancing between structured and unstructured logs.\\n- Leverage full-text search and machine learning for efficient log analysis.\\n- Implement cost-effective storage strategies and smart data retention policies.\\n- Enhance your logging strategy with APM, metrics, and traces for a complete observability solution.\\n\\nContinuously evaluate and adjust your strategies to keep pace with the growing volume and complexity of log data, and you\'ll be well-equipped to ensure the reliability, performance, and security of your applications and infrastructure.\\n\\nCheck out our other blogs:\\n- [Build better Service Level Objectives (SLOs) from logs and metrics](https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics)\\n- [AWS VPC Flow log analysis with GenAI in Elastic](https://www.elastic.co/observability-labs/blog/aws-vpc-flow-log-analysis-with-genai-elastic)\\n- [Migrating 1 billion log lines from OpenSearch to Elasticsearch](https://www.elastic.co/observability-labs/blog/migrating-billion-log-lines-opensearch-elasticsearch)\\n- [Pruning incoming log volumes with Elastic](https://www.elastic.co/observability-labs/blog/pruning-incoming-log-volumes)\\n\\nReady to get started? Use Elastic Observability on Elastic Cloud — the hosted Elasticsearch service that includes all of the latest features.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._","code":"var Component=(()=>{var g=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),w=(i,e)=>{for(var t in e)o(i,t,{get:e[t],enumerable:!0})},s=(i,e,t,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!f.call(i,a)&&a!==t&&o(i,a,{get:()=>e[a],enumerable:!(r=u(e,a))||r.enumerable});return i};var v=(i,e,t)=>(t=i!=null?g(p(i)):{},s(e||!i||!i.__esModule?o(t,\\"default\\",{value:i,enumerable:!0}):t,i)),b=i=>s(o({},\\"__esModule\\",{value:!0}),i);var c=y((E,l)=>{l.exports=_jsx_runtime});var k={};w(k,{default:()=>h,frontmatter:()=>x});var n=v(c()),x={title:\\"Best Practices for Log Management: Leveraging Logs for Faster Problem Resolution\\",slug:\\"best-practices-logging\\",date:\\"2024-09-11\\",description:\\"Explore effective log management strategies to improve system reliability and performance. Learn about data collection, processing, analysis, and cost-effective management of logs in complex software environments.\\",author:[{slug:\\"luca-wintergerst\\"},{slug:\\"david-hope\\"},{slug:\\"bahubali-shetti\\"}],image:\\"best-practices-log-management.png\\",tags:[{slug:\\"log-analytics\\"}]};function d(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",h4:\\"h4\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...i.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"In today\'s rapid software development landscape, efficient log management is crucial for maintaining system reliability and performance. With expanding and complex infrastructure and application components, the responsibilities of operations and development teams are ever-growing and multifaceted. This blog post outlines best practices for effective log management, addressing the challenges of growing data volumes, complex infrastructures, and the need for quick problem resolution.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"understanding-logs-and-their-importance\\",children:\\"Understanding Logs and Their Importance\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Logs are records of events occurring within your infrastructure, typically including a timestamp, a message detailing the event, and metadata identifying the source. They are invaluable for diagnosing issues, providing early warnings, and speeding up problem resolution. Logs are often the primary signal that developers enable, offering significant detail for debugging, performance analysis, security, and compliance management.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"the-logging-journey\\",children:\\"The Logging Journey\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The logging journey involves three basic steps: collection and ingestion, processing and enrichment, and analysis and rationalization. Let\'s explore each step in detail, covering some of the best practices for each section.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/best-practices-logging/blog-elastic-collection-and-ingest.png\\",alt:\\"Logging Journey\\",width:\\"1999\\",height:\\"1115\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"1-log-collection-and-ingestion\\",children:\\"1. Log Collection and Ingestion\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"collect-everything-relevant-and-actionable\\",children:\\"Collect Everything Relevant and Actionable\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The first step is to collect all logs into a central location. This involves identifying all your applications and systems and collecting their logs. Comprehensive data collection ensures no critical information is missed, providing a complete picture of your system\'s behavior. In the event of an incident, having all logs in one place can significantly reduce the time to resolution. It\'s generally better to collect more data than you need, as you can always filter out irrelevant information later, as well as delete logs that are no longer needed more quickly.\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"leverage-integrations\\",children:\\"Leverage Integrations\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic provides over 300 integrations that simplify data onboarding. These integrations not only collect data but also come with dashboards, saved searches, and pipelines to parse the data. Utilizing these integrations can significantly reduce manual effort and ensure data consistency.\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"consider-ingestion-capacity-and-costs\\",children:\\"Consider Ingestion Capacity and Costs\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"An important aspect of log collection is ensuring you have sufficient ingestion capacity at a manageable cost. When assessing solutions, be cautious about those that charge significantly more for high cardinality data, as this can lead to unexpectedly high costs in observability solutions. We\'ll talk more about cost effective log management later in this post.\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"use-kafka-for-large-projects\\",children:\\"Use Kafka for Large Projects\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"For larger organizations, implementing Kafka can improve log data management. Kafka acts as a buffer, making the system more reliable and easier to manage. It allows different teams to send data to a centralized location, which can then be ingested into Elastic.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"2-processing-and-enrichment\\",children:\\"2. Processing and Enrichment\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"adopt-elastic-common-schema-ecs\\",children:\\"Adopt Elastic Common Schema (ECS)\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"One key aspect of log collection is to have the most amount of normalization across all of your applications and infrastructure. Having a common semantic schema is crucial. Elastic contributed Elastic Common Schema (ECS) to OpenTelemetry (OTel), helping accelerate the adoption of OTel-based observability and security. This move towards a more normalized way to define and ingest logs (including metrics and traces) is beneficial for the industry.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Using ECS helps standardize field names and data structures, making data analysis and correlation easier. This common schema ensures your data is organized predictably, facilitating more efficient querying and reporting. Learn more about ECS \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-reference.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h4,{id:\\"optimize-mappings-for-high-volume-data\\",children:\\"Optimize Mappings for High Volume Data\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"For high cardinality fields or those rarely used, consider optimizing or removing them from the index. This can improve performance by reducing the amount of data that needs to be indexed and searched. Our documentation has sections to tune your setup for \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html\\",rel:\\"nofollow\\",children:\\"disk usage\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html\\",rel:\\"nofollow\\",children:\\"search speed\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-indexing-speed.html\\",rel:\\"nofollow\\",children:\\"indexing speed\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h4,{id:\\"managing-structured-vs-unstructured-logs\\",children:\\"Managing Structured vs. Unstructured Logs\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Structured logs are generally preferable as they offer more value and are easier to work with. They have a predefined format and fields, simplifying information extraction and analysis. For custom logs without pre-built integrations, you may need to define your own parsing rules.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"For unstructured logs, full-text search capabilities can help mitigate limitations. By indexing logs, full-text search allows users to search for specific keywords or phrases efficiently, even within large volumes of unstructured data. This is one of the main differentiators of Elastic\'s observability solution. You can simply search for any keyword or phrase and get results in real-time, without needing to write complex regular expressions or parsing rules at query time.\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"schema-on-read-vs-schema-on-write\\",children:\\"Schema-on-Read vs. Schema-on-Write\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"There are two main approaches to processing log data:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Schema-on-read: Some observability dashboarding capabilities can perform runtime transformations to extract fields from non-parsed sources on the fly. This is helpful when dealing with legacy systems or custom applications that may not log data in a standardized format. However, runtime parsing can be time-consuming and resource-intensive, especially for large volumes of data.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Schema-on-write: This approach offers better performance and more control over the data. The schema is defined upfront, and the data is structured and validated at the time of writing. This allows for faster processing and analysis of the data, which is beneficial for enrichment.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"3-analysis-and-rationalization\\",children:\\"3. Analysis and Rationalization\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"full-text-search\\",children:\\"Full-Text Search\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic\'s full-text search capabilities, powered by Elasticsearch, allow you to quickly find relevant logs. The Kibana Query Language (KQL) enhances search efficiency, enabling you to filter and drill down into the data to identify issues rapidly.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here are a few examples of KQL queries:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`// Filter documents where a field exists\\nhttp.request.method: *\\n\\n// Filter documents that match a specific value\\nhttp.request.method: GET\\n\\n// Search all fields for a specific value\\nHello\\n\\n// Filter documents where a text field contains specific terms\\nhttp.request.body.content: \\"null pointer\\"\\n\\n// Filter documents within a range\\nhttp.response.bytes < 10000\\n\\n// Combine range queries\\nhttp.response.bytes > 10000 and http.response.bytes <= 20000\\n\\n// Use wildcards to match patterns\\nhttp.response.status_code: 4*\\n\\n// Negate a query\\nnot http.request.method: GET\\n\\n// Combine multiple queries with AND/OR\\nhttp.request.method: GET and http.response.status_code: 400\\n`})}),`\\n`,(0,n.jsx)(e.h4,{id:\\"machine-learning-integration\\",children:\\"Machine Learning Integration\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Machine learning can automate the detection of anomalies and patterns within your log data. Elastic offers features like log rate analysis that automatically identify deviations from normal behavior. By leveraging machine learning, you can proactively address potential issues before they escalate.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/best-practices-logging/screenshot-machine-learning-smv-anomaly.png\\",alt:\\"Machine Learning\\",width:\\"1140\\",height:\\"641\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"It is recommended that organizations utilize a diverse arsenal of machine learning algorithms and techniques to effectively uncover unknown-unknowns in log files. Unsupervised machine learning algorithms, should be employed for anomaly detection on real-time data, with rate-controlled alerting based on severity.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"By automatically identifying influencers, users can gain valuable context for automated root cause analysis (RCA). Log pattern analysis brings categorization to unstructured logs, while log rate analysis and change point detection help identify the root causes of spikes in log data.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Take a look at the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-overview.html\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\" to get started with machine learning in Elastic.\\"]}),`\\n`,(0,n.jsx)(e.h4,{id:\\"dashboarding-and-alerting\\",children:\\"Dashboarding and Alerting\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Building dashboards and setting up alerting helps you monitor your logs in real-time. Dashboards provide a visual representation of your logs, making it easier to identify patterns and anomalies. Alerting can notify you when specific events occur, allowing you to take action quickly.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"cost-effective-log-management\\",children:\\"Cost-Effective Log Management\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"use-data-tiers\\",children:\\"Use Data Tiers\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Implementing index lifecycle management to move data across hot, warm, cold, and frozen tiers can significantly reduce storage costs. This approach ensures that only the most frequently accessed data is stored on expensive, high-performance storage, while older data is moved to more cost-effective storage solutions.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/best-practices-logging/ilm.png\\",alt:\\"ILM\\",width:\\"2958\\",height:\\"1790\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Our documentation explains how to set up \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/index-lifecycle-management.html\\",rel:\\"nofollow\\",children:\\"Index Lifecycle Management\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"compression-and-index-sorting\\",children:\\"Compression and Index Sorting\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\'Applying best compression settings and using index sorting can further reduce the data footprint. Optimizing the way data is stored on disk can lead to substantial savings in storage costs and improve retrieval performance. As of 8.15, Elasticsearch provides an indexing mode called \\"logsdb\\". This is a highly optimized way of storing log data. This new way of indexing data uses 2.5 times less disk space than the default mode. You can read more about it \',(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/logs-data-stream.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". This mode automatically applies the best combination of settings for compression, index sorting, and other optimizations that weren\'t accessible to users before.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"snapshot-lifecycle-management-slm\\",children:\\"Snapshot Lifecycle Management (SLM)\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/best-practices-logging/slm.png\\",alt:\\"SLM\\",width:\\"2958\\",height:\\"1790\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"SLM allows you to back up your data and delete it from the main cluster, freeing up resources. If needed, data can be restored quickly for analysis, ensuring that you maintain the ability to investigate historical events without incurring high storage costs.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Learn more about SLM in the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/snapshot-lifecycle-management.html\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"dealing-with-large-amounts-of-log-data\\",children:\\"Dealing with Large Amounts of Log Data\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Managing large volumes of log data can be challenging. Here are some strategies to optimize log management:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Develop a logs deletion policy. Evaluate what data to collect and when to delete it.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Consider discarding DEBUG logs or even INFO logs earlier, and delete dev and staging environment logs sooner.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Aggregate short windows of identical log lines, which is especially useful for TCP security event logging.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"For applications and code you control, consider moving some logs into traces to reduce log volume while maintaining detailed information.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"centralized-vs-decentralized-log-storage\\",children:\\"Centralized vs. Decentralized Log Storage\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Data locality is an important consideration when managing log data. The costs of ingressing and egressing large amounts of log data can be prohibitively high, especially when dealing with cloud providers.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In the absence of regional redundancy requirements, your organization may not need to send all log data to a central location. Consider keeping log data local to the datacenter where it was generated to reduce ingress and egress costs.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-cross-cluster-search.html\\",rel:\\"nofollow\\",children:\\"Cross-cluster search\\"}),\\" functionality enables users to search across multiple logging clusters simultaneously, reducing the amount of data that needs to be transferred over the network.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/xpack-ccr.html\\",rel:\\"nofollow\\",children:\\"Cross-cluster replication\\"}),\\" is useful for maintaining business continuity in the event of a disaster, ensuring data availability even during an outage in one datacenter.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"monitoring-and-performance\\",children:\\"Monitoring and Performance\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"monitor-your-log-management-system\\",children:\\"Monitor Your Log Management System\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Using a dedicated monitoring cluster can help you track the performance of your Elastic deployment. \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/xpack-monitoring.html\\",rel:\\"nofollow\\",children:\\"Stack monitoring\\"}),\\" provides metrics on search and indexing activity, helping you identify and resolve performance bottlenecks.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"adjust-bulk-size-and-refresh-interval\\",children:\\"Adjust Bulk Size and Refresh Interval\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-indexing-speed.html\\",rel:\\"nofollow\\",children:\\"Optimizing these settings\\"}),\\" can balance performance and resource usage. Increasing bulk size and refresh interval can improve indexing efficiency, especially for high-throughput environments.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"logging-best-practices\\",children:\\"Logging Best Practices\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"adjust-log-levels\\",children:\\"Adjust Log Levels\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Ensure that log levels are appropriately set for all applications. Customize log formats to facilitate easier ingestion and analysis. Properly configured log levels can reduce noise and make it easier to identify critical issues.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"use-modern-logging-frameworks\\",children:\\"Use Modern Logging Frameworks\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[`Implement logging frameworks that support structured logging. Adding metadata to logs enhances their usefulness for analysis. Structured logging formats, such as JSON, allow logs to be easily parsed and queried, improving the efficiency of log analysis.\\nIf you fully control the application and are already using structured logging, consider using `,(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/ecs-logging\\",rel:\\"nofollow\\",children:\\"Elastic\'s version of these libraries\\"}),\\", which can automatically parse logs into ECS fields.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"leverage-apm-and-metrics\\",children:\\"Leverage APM and Metrics\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"For custom-built applications, Application Performance Monitoring (APM) provides deeper insights into application performance, complementing traditional logging. APM tracks transactions across services, helping you understand dependencies and identify performance bottlenecks.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/best-practices-logging/apm.png\\",alt:\\"APM\\",width:\\"2862\\",height:\\"1790\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Consider collecting metrics alongside logs. Metrics can provide insights into your system\'s performance, such as CPU usage, memory usage, and network traffic. If you\'re already collecting logs from your systems, adding metrics collection is usually a quick process.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Traces can provide even deeper insights into specific transactions or request paths, especially in cloud-native environments. They offer more contextual information and excel at tracking dependencies across services. However, implementing tracing is only possible for applications you own, and not all developers have fully embraced it yet.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"A combined logging and tracing strategy is recommended, where traces provide coverage for newer instrumented apps, and logging supports legacy applications and systems you don\'t own the source code for.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Effective log management is essential for maintaining system reliability and performance in today\'s complex software environments. By following these best practices, you can optimize your log management process, reduce costs, and improve problem resolution times.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Key takeaways include:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Ensure comprehensive log collection with a focus on normalization and common schemas.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Use appropriate processing and enrichment techniques, balancing between structured and unstructured logs.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Leverage full-text search and machine learning for efficient log analysis.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Implement cost-effective storage strategies and smart data retention policies.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Enhance your logging strategy with APM, metrics, and traces for a complete observability solution.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Continuously evaluate and adjust your strategies to keep pace with the growing volume and complexity of log data, and you\'ll be well-equipped to ensure the reliability, performance, and security of your applications and infrastructure.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Check out our other blogs:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics\\",rel:\\"nofollow\\",children:\\"Build better Service Level Objectives (SLOs) from logs and metrics\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/aws-vpc-flow-log-analysis-with-genai-elastic\\",rel:\\"nofollow\\",children:\\"AWS VPC Flow log analysis with GenAI in Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/migrating-billion-log-lines-opensearch-elasticsearch\\",rel:\\"nofollow\\",children:\\"Migrating 1 billion log lines from OpenSearch to Elasticsearch\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/pruning-incoming-log-volumes\\",rel:\\"nofollow\\",children:\\"Pruning incoming log volumes with Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Ready to get started? Use Elastic Observability on Elastic Cloud \\\\u2014 the hosted Elasticsearch service that includes all of the latest features.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(i={}){let{wrapper:e}=i.components||{};return e?(0,n.jsx)(e,{...i,children:(0,n.jsx)(d,{...i})}):d(i)}return b(k);})();\\n;return Component;"},"_id":"articles/best-practices-logging.mdx","_raw":{"sourceFilePath":"articles/best-practices-logging.mdx","sourceFileName":"best-practices-logging.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/best-practices-logging"},"type":"Article","imageUrl":"/assets/images/best-practices-logging/best-practices-log-management.png","readingTime":"12 min read","url":"/best-practices-logging","headings":[{"level":2,"title":"Understanding Logs and Their Importance","href":"#understanding-logs-and-their-importance"},{"level":2,"title":"The Logging Journey","href":"#the-logging-journey"},{"level":3,"title":"1. Log Collection and Ingestion","href":"#1-log-collection-and-ingestion"},{"level":4,"title":"Collect Everything Relevant and Actionable","href":"#collect-everything-relevant-and-actionable"},{"level":4,"title":"Leverage Integrations","href":"#leverage-integrations"},{"level":4,"title":"Consider Ingestion Capacity and Costs","href":"#consider-ingestion-capacity-and-costs"},{"level":4,"title":"Use Kafka for Large Projects","href":"#use-kafka-for-large-projects"},{"level":3,"title":"2. Processing and Enrichment","href":"#2-processing-and-enrichment"},{"level":4,"title":"Adopt Elastic Common Schema (ECS)","href":"#adopt-elastic-common-schema-ecs"},{"level":4,"title":"Optimize Mappings for High Volume Data","href":"#optimize-mappings-for-high-volume-data"},{"level":4,"title":"Managing Structured vs. Unstructured Logs","href":"#managing-structured-vs-unstructured-logs"},{"level":4,"title":"Schema-on-Read vs. Schema-on-Write","href":"#schema-on-read-vs-schema-on-write"},{"level":3,"title":"3. Analysis and Rationalization","href":"#3-analysis-and-rationalization"},{"level":4,"title":"Full-Text Search","href":"#full-text-search"},{"level":4,"title":"Machine Learning Integration","href":"#machine-learning-integration"},{"level":4,"title":"Dashboarding and Alerting","href":"#dashboarding-and-alerting"},{"level":2,"title":"Cost-Effective Log Management","href":"#cost-effective-log-management"},{"level":3,"title":"Use Data Tiers","href":"#use-data-tiers"},{"level":3,"title":"Compression and Index Sorting","href":"#compression-and-index-sorting"},{"level":3,"title":"Snapshot Lifecycle Management (SLM)","href":"#snapshot-lifecycle-management-slm"},{"level":3,"title":"Dealing with Large Amounts of Log Data","href":"#dealing-with-large-amounts-of-log-data"},{"level":3,"title":"Centralized vs. Decentralized Log Storage","href":"#centralized-vs-decentralized-log-storage"},{"level":2,"title":"Monitoring and Performance","href":"#monitoring-and-performance"},{"level":3,"title":"Monitor Your Log Management System","href":"#monitor-your-log-management-system"},{"level":3,"title":"Adjust Bulk Size and Refresh Interval","href":"#adjust-bulk-size-and-refresh-interval"},{"level":2,"title":"Logging Best Practices","href":"#logging-best-practices"},{"level":3,"title":"Adjust Log Levels","href":"#adjust-log-levels"},{"level":3,"title":"Use Modern Logging Frameworks","href":"#use-modern-logging-frameworks"},{"level":3,"title":"Leverage APM and Metrics","href":"#leverage-apm-and-metrics"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Revolutionizing big data management: Unveiling the power of Amazon EMR and Elastic integration","slug":"big-data-management-amazon-emr-elastic-integration","date":"2023-09-26","description":"Amazon EMR allows you to easily run and scale big data workloads. With Elastic’s native integration, you\'ll find the confidence to monitor, analyze, and optimize your EMR clusters, opening up exciting opportunities for your data-driven initiatives.","image":"21-cubes.jpeg","author":[{"slug":"udayasimha-theepireddy-uday","type":"Author","_raw":{}},{"slug":"subhrata-kulshrestha","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"aws","type":"Tag","_raw":{}},{"slug":"amazon-emr","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the dynamic realm of data processing, Amazon EMR takes center stage as an AWS-provided big data service, offering a cost-effective conduit for running Apache Spark and a plethora of other open-source applications. While the capabilities of EMR are impressive, the art of vigilant monitoring holds the key to unlocking its full potential. This blog post explains the pivotal role of monitoring Amazon EMR clusters, accentuating the transformative integration with Elastic\xae.\\n\\nElastic can make it easier for organizations to transform data into actionable insights and stop threats quickly with unified visibility across your environment — so mission-critical applications can keep running smoothly no matter what. From a free trial and fast deployment to sending logs to Elastic securely and frictionlessly, all you need to do is point and click to capture, store, and search data from your AWS services.\\n\\n## Monitoring EMR via Elastic Observability\\n\\nIn this article, we will delve into the following key aspects:\\n\\n- **Enabling EMR cluster metrics for Elastic integration:** Learn the intricacies of configuring an EMR cluster to emit metrics that Elastic can effectively extract, paving the way for insightful analysis.\\n- **Harnessing Kibana** \xae **dashboards for EMR workload analysis:** Discover the potential of utilizing Kibana dashboards to dissect metrics related to an EMR workload. By gaining a deeper understanding, we open the doors to optimization opportunities.\\n\\n### Key benefits of AWS EMR integration\\n\\n- **Comprehensive monitoring:** Monitor the health and performance of your EMR clusters in real time. Track metrics related to cluster status and utilization, node status, IO, and many others, allowing you to identify bottlenecks and optimize your data processing.\\n- **Log analysis:** Dive deep into EMR logs with ease. Our integration enables you to collect and analyze logs from your clusters, helping you troubleshoot issues and gain valuable insights.\\n- **Cost optimization:** Understand the cost implications of your EMR clusters. By monitoring resource utilization, you can identify opportunities to optimize your cluster configurations and reduce costs.\\n- **Alerting and notifications:** Set up custom alerts based on EMR metrics and logs. Receive notifications when performance thresholds are breached, ensuring that you can take action promptly.\\n- **Seamless integration:** Our integration is designed for ease of use. Getting started is simple, and you can start monitoring your EMR clusters quickly.\\n\\nAccompanying these discussions is an illustrative solution architecture diagram, providing a visual representation of the intricacies and interactions within the proposed solution.\\n\\n![1](/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-1-flowchart-aws-emr.png)\\n\\n## How to get started\\n\\nGetting started with AWS EMR integration in Observability is easy. Here\'s a quick overview of the steps:\\n\\n### Prerequisites and configurations\\n\\nIf you intend to follow the steps outlined in this blog post, there are a few prerequisites and configurations that you should have in place beforehand.\\n\\n1. You will need an account on [Elastic Cloud](https://cloud.elastic.co/) and a deployed stack and agent. Instructions for deploying a stack on AWS can be found [here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html). This is necessary for AWS EMR logging and analysis.\\n\\n2. You will also need an AWS account with the necessary permissions to pull data from AWS. Details on the required permissions can be found in our [documentation](https://docs.elastic.co/en/integrations/aws#aws-permissions).\\n\\n3. Finally, be sure to turn on EMR monitoring for the EMR cluster when you deploy the cluster.\\n\\n### Step 1: Create an account with Elastic\\n\\n[Create an account on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home) by following the steps provided.\\n\\n### Step 2: Add integration\\n\\n1. Log in to your [Elastic Cloud on AWS](https://cloud.elastic.co/registration) deployment.\\n\\n![2 free trial](/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-2-free-trial.png)\\n\\n2. Click on **Add Integration**. You will be navigated to a catalog of supported integrations.\\n\\n![3 welcome home](/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-3-welcome-home.png)\\n\\n3. Search and select **Amazon EMR**.\\n\\n![4 integrations](/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-4-integrations.png)\\n\\n### Step 3: Configure integration\\n\\n1. Click on the **Add Amazon EMR** button and provide the required details.\\n\\n2. Provide the required access credentials to connect to your EMR instance.\\n\\n3. You can choose to collect EMR metrics, EMR logs via S3, or EMR logs via Cloudwatch.\\n\\n4. Click on the **Save and continue** button at the bottom of the page.\\n\\n![5 amazon emr](/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-5-amazon-emr.png)\\n\\n![6 add amazon emr integration](/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-6-add-amazon-emr.png)\\n\\n### Step 4: Analyze and monitor\\n\\nExplore the data using the out-of-the-box dashboards available for the integration. Select **Discover** from the Elastic Cloud top-level menu.\\n\\n![7 manage deployment](/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-7-manage-deployment.png)\\n\\nOr, create custom dashboards, set up alerts, and gain actionable insights into your EMR clusters\' performance.\\n\\nThis integration streamlines the collection of vital metrics and logs, including Cluster Status, Node Status, IO, and Cluster Capacity. Some metrics gathered include:\\n\\n- **IsIdle:** Indicates that a cluster is no longer performing work, but is still alive and accruing charges\\n- **ContainerAllocated:** The number of resource containers allocated by the ResourceManager\\n- **ContainerReserved:** The number of containers reserved\\n- **CoreNodesRunning:** The number of core nodes working\\n- **CoreNodesPending:** The number of core nodes waiting to be assigned\\n- **MRActiveNodes:** The number of nodes presently running MapReduce tasks or jobs\\n- **MRLostNodes:** The number of nodes allocated to MapReduce that have been marked in a LOST state\\n- **HDFSUtilization:** The percentage of HDFS storage currently used\\n- **HDFSBytesRead/Written:** The number of bytes read/written from HDFS (This metric aggregates MapReduce jobs only, and does not apply for other workloads on Amazon EMR.)\\n- **TotalUnitsRequested/TotalNodesRequested/TotalVCPURequested:** The target total number of units/nodes/vCPUs in a cluster as determined by managed scaling\\n\\n![8 pie graph](/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-8-pie-graphs.png)\\n\\n## Conclusion\\n\\nElastic is committed to fulfilling all your observability requirements, offering an effortless experience. Our integrations are designed to simplify the process of ingesting telemetry data, granting you convenient access to critical information for monitoring, analytics, and observability. The native AWS EMR integration underscores our dedication to delivering seamless solutions for your data needs. With this integration, you\'ll find the confidence to monitor, analyze, and optimize your EMR clusters, opening up exciting opportunities for your data-driven initiatives.\\n\\n## Start a free trial today\\n\\nStart your own [7-day free trial](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el) by signing up via [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da&sc_channel=el&ultron=gobig&hulk=regpage&blade=elasticweb&gambit=mp-b) and quickly spin up a deployment in minutes on any of the [Elastic Cloud regions on AWS](https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions) around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var g=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),b=(i,e)=>{for(var t in e)o(i,t,{get:e[t],enumerable:!0})},s=(i,e,t,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!f.call(i,a)&&a!==t&&o(i,a,{get:()=>e[a],enumerable:!(r=u(e,a))||r.enumerable});return i};var w=(i,e,t)=>(t=i!=null?g(p(i)):{},s(e||!i||!i.__esModule?o(t,\\"default\\",{value:i,enumerable:!0}):t,i)),v=i=>s(o({},\\"__esModule\\",{value:!0}),i);var c=y((k,l)=>{l.exports=_jsx_runtime});var R={};b(R,{default:()=>h,frontmatter:()=>E});var n=w(c()),E={title:\\"Revolutionizing big data management: Unveiling the power of Amazon EMR and Elastic integration\\",slug:\\"big-data-management-amazon-emr-elastic-integration\\",date:\\"2023-09-26\\",description:\\"Amazon EMR allows you to easily run and scale big data workloads. With Elastic\\\\u2019s native integration, you\'ll find the confidence to monitor, analyze, and optimize your EMR clusters, opening up exciting opportunities for your data-driven initiatives.\\",author:[{slug:\\"udayasimha-theepireddy-uday\\"},{slug:\\"subhrata-kulshrestha\\"}],image:\\"21-cubes.jpeg\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"aws\\"},{slug:\\"amazon-emr\\"},{slug:\\"metrics\\"}]};function d(i){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"In the dynamic realm of data processing, Amazon EMR takes center stage as an AWS-provided big data service, offering a cost-effective conduit for running Apache Spark and a plethora of other open-source applications. While the capabilities of EMR are impressive, the art of vigilant monitoring holds the key to unlocking its full potential. This blog post explains the pivotal role of monitoring Amazon EMR clusters, accentuating the transformative integration with Elastic\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic can make it easier for organizations to transform data into actionable insights and stop threats quickly with unified visibility across your environment \\\\u2014 so mission-critical applications can keep running smoothly no matter what. From a free trial and fast deployment to sending logs to Elastic securely and frictionlessly, all you need to do is point and click to capture, store, and search data from your AWS services.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"monitoring-emr-via-elastic-observability\\",children:\\"Monitoring EMR via Elastic Observability\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this article, we will delve into the following key aspects:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Enabling EMR cluster metrics for Elastic integration:\\"}),\\" Learn the intricacies of configuring an EMR cluster to emit metrics that Elastic can effectively extract, paving the way for insightful analysis.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Harnessing Kibana\\"}),\\" \\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" \\",(0,n.jsx)(e.strong,{children:\\"dashboards for EMR workload analysis:\\"}),\\" Discover the potential of utilizing Kibana dashboards to dissect metrics related to an EMR workload. By gaining a deeper understanding, we open the doors to optimization opportunities.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"key-benefits-of-aws-emr-integration\\",children:\\"Key benefits of AWS EMR integration\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Comprehensive monitoring:\\"}),\\" Monitor the health and performance of your EMR clusters in real time. Track metrics related to cluster status and utilization, node status, IO, and many others, allowing you to identify bottlenecks and optimize your data processing.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Log analysis:\\"}),\\" Dive deep into EMR logs with ease. Our integration enables you to collect and analyze logs from your clusters, helping you troubleshoot issues and gain valuable insights.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Cost optimization:\\"}),\\" Understand the cost implications of your EMR clusters. By monitoring resource utilization, you can identify opportunities to optimize your cluster configurations and reduce costs.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Alerting and notifications:\\"}),\\" Set up custom alerts based on EMR metrics and logs. Receive notifications when performance thresholds are breached, ensuring that you can take action promptly.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Seamless integration:\\"}),\\" Our integration is designed for ease of use. Getting started is simple, and you can start monitoring your EMR clusters quickly.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Accompanying these discussions is an illustrative solution architecture diagram, providing a visual representation of the intricacies and interactions within the proposed solution.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-1-flowchart-aws-emr.png\\",alt:\\"1\\",width:\\"1999\\",height:\\"664\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"how-to-get-started\\",children:\\"How to get started\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Getting started with AWS EMR integration in Observability is easy. Here\'s a quick overview of the steps:\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"prerequisites-and-configurations\\",children:\\"Prerequisites and configurations\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you intend to follow the steps outlined in this blog post, there are a few prerequisites and configurations that you should have in place beforehand.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"You will need an account on \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack and agent. Instructions for deploying a stack on AWS can be found \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". This is necessary for AWS EMR logging and analysis.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"You will also need an AWS account with the necessary permissions to pull data from AWS. Details on the required permissions can be found in our \\",(0,n.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#aws-permissions\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Finally, be sure to turn on EMR monitoring for the EMR cluster when you deploy the cluster.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-1-create-an-account-with-elastic\\",children:\\"Step 1: Create an account with Elastic\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"Create an account on Elastic Cloud\\"}),\\" by following the steps provided.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-2-add-integration\\",children:\\"Step 2: Add integration\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Log in to your \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Elastic Cloud on AWS\\"}),\\" deployment.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-2-free-trial.png\\",alt:\\"2 free trial\\",width:\\"1671\\",height:\\"672\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Click on \\",(0,n.jsx)(e.strong,{children:\\"Add Integration\\"}),\\". You will be navigated to a catalog of supported integrations.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-3-welcome-home.png\\",alt:\\"3 welcome home\\",width:\\"1999\\",height:\\"1088\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Search and select \\",(0,n.jsx)(e.strong,{children:\\"Amazon EMR\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-4-integrations.png\\",alt:\\"4 integrations\\",width:\\"1999\\",height:\\"819\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-3-configure-integration\\",children:\\"Step 3: Configure integration\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Click on the \\",(0,n.jsx)(e.strong,{children:\\"Add Amazon EMR\\"}),\\" button and provide the required details.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Provide the required access credentials to connect to your EMR instance.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"You can choose to collect EMR metrics, EMR logs via S3, or EMR logs via Cloudwatch.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Click on the \\",(0,n.jsx)(e.strong,{children:\\"Save and continue\\"}),\\" button at the bottom of the page.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-5-amazon-emr.png\\",alt:\\"5 amazon emr\\",width:\\"1999\\",height:\\"835\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-6-add-amazon-emr.png\\",alt:\\"6 add amazon emr integration\\",width:\\"1999\\",height:\\"908\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-4-analyze-and-monitor\\",children:\\"Step 4: Analyze and monitor\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Explore the data using the out-of-the-box dashboards available for the integration. Select \\",(0,n.jsx)(e.strong,{children:\\"Discover\\"}),\\" from the Elastic Cloud top-level menu.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-7-manage-deployment.png\\",alt:\\"7 manage deployment\\",width:\\"495\\",height:\\"747\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Or, create custom dashboards, set up alerts, and gain actionable insights into your EMR clusters\' performance.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This integration streamlines the collection of vital metrics and logs, including Cluster Status, Node Status, IO, and Cluster Capacity. Some metrics gathered include:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"IsIdle:\\"}),\\" Indicates that a cluster is no longer performing work, but is still alive and accruing charges\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"ContainerAllocated:\\"}),\\" The number of resource containers allocated by the ResourceManager\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"ContainerReserved:\\"}),\\" The number of containers reserved\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"CoreNodesRunning:\\"}),\\" The number of core nodes working\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"CoreNodesPending:\\"}),\\" The number of core nodes waiting to be assigned\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"MRActiveNodes:\\"}),\\" The number of nodes presently running MapReduce tasks or jobs\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"MRLostNodes:\\"}),\\" The number of nodes allocated to MapReduce that have been marked in a LOST state\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"HDFSUtilization:\\"}),\\" The percentage of HDFS storage currently used\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"HDFSBytesRead/Written:\\"}),\\" The number of bytes read/written from HDFS (This metric aggregates MapReduce jobs only, and does not apply for other workloads on Amazon EMR.)\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"TotalUnitsRequested/TotalNodesRequested/TotalVCPURequested:\\"}),\\" The target total number of units/nodes/vCPUs in a cluster as determined by managed scaling\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/big-data-management-amazon-emr-elastic-integration/elastic-blog-8-pie-graphs.png\\",alt:\\"8 pie graph\\",width:\\"1714\\",height:\\"893\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic is committed to fulfilling all your observability requirements, offering an effortless experience. Our integrations are designed to simplify the process of ingesting telemetry data, granting you convenient access to critical information for monitoring, analytics, and observability. The native AWS EMR integration underscores our dedication to delivering seamless solutions for your data needs. With this integration, you\'ll find the confidence to monitor, analyze, and optimize your EMR clusters, opening up exciting opportunities for your data-driven initiatives.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"start-a-free-trial-today\\",children:\\"Start a free trial today\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Start your own \\",(0,n.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"7-day free trial\\"}),\\" by signing up via \\",(0,n.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da&sc_channel=el&ultron=gobig&hulk=regpage&blade=elasticweb&gambit=mp-b\\",rel:\\"nofollow\\",children:\\"AWS Marketplace\\"}),\\" and quickly spin up a deployment in minutes on any of the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions\\",rel:\\"nofollow\\",children:\\"Elastic Cloud regions on AWS\\"}),\\" around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(i={}){let{wrapper:e}=i.components||{};return e?(0,n.jsx)(e,{...i,children:(0,n.jsx)(d,{...i})}):d(i)}return v(R);})();\\n;return Component;"},"_id":"articles/big-data-management-amazon-emr-elastic-integration.mdx","_raw":{"sourceFilePath":"articles/big-data-management-amazon-emr-elastic-integration.mdx","sourceFileName":"big-data-management-amazon-emr-elastic-integration.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/big-data-management-amazon-emr-elastic-integration"},"type":"Article","imageUrl":"/assets/images/big-data-management-amazon-emr-elastic-integration/21-cubes.jpeg","readingTime":"6 min read","url":"/big-data-management-amazon-emr-elastic-integration","headings":[{"level":2,"title":"Monitoring EMR via Elastic Observability","href":"#monitoring-emr-via-elastic-observability"},{"level":3,"title":"Key benefits of AWS EMR integration","href":"#key-benefits-of-aws-emr-integration"},{"level":2,"title":"How to get started","href":"#how-to-get-started"},{"level":3,"title":"Prerequisites and configurations","href":"#prerequisites-and-configurations"},{"level":3,"title":"Step 1: Create an account with Elastic","href":"#step-1-create-an-account-with-elastic"},{"level":3,"title":"Step 2: Add integration","href":"#step-2-add-integration"},{"level":3,"title":"Step 3: Configure integration","href":"#step-3-configure-integration"},{"level":3,"title":"Step 4: Analyze and monitor","href":"#step-4-analyze-and-monitor"},{"level":2,"title":"Conclusion","href":"#conclusion"},{"level":2,"title":"Start a free trial today","href":"#start-a-free-trial-today"}]},{"title":"Bringing Your Cloud-Managed Kubernetes Audit Logs into Elasticsearch","slug":"bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch","date":"2024-08-19","description":"How to bring your Cloud-Managed Kubernetes Audit Logs into Elasticsearch","image":"bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch.jpg","author":[{"slug":"rema-s","type":"Author","_raw":{}},{"slug":"stephen-brown","type":"Author","_raw":{}}],"tags":[{"slug":"aws","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}}],"body":{"raw":"\\n## Introduction:\\nKubernetes audit logs are essential for ensuring the security, compliance, and transparency of Kubernetes clusters. However, with managed Kubernetes infrastructure, traditional audit file-based log shipping is often not supported, and audit logs are only available via the control plane API or the Cloud Provider logging facility. In this blog, we will show you how to ingest the audit logs from these other sources and still take advantage of the [Elastic Kubernetes Audit Log Integration](https://www.elastic.co/docs/current/integrations/kubernetes/audit-logs).\\n\\nIn this blog we will be focusing on AWS as our cloud provider and when ingesting logs from AWS you have several options:\\n\\n- [AWS Custom Logs integration](https://www.elastic.co/docs/current/integrations/aws_logs) (which we will utilize in this blog)\\n- [AWS Firehose](https://www.elastic.co/observability-labs/blog/aws-kinesis-data-firehose-observability-analytics) to send logs from Cloudwatch to Elastic\\n- [AWS General integration](https://www.elastic.co/docs/current/integrations/aws) which supports many AWS sources\\n\\nIn part 1 of this two-part series, we will focus on properly ingesting Kubernetes Audit, and part 2 will focus on investigation, analytics, and alerting.\\n\\nKubernetes auditing [documentation](https://kubernetes.io/docs/tasks/debug/debug-cluster/audit/) describes the need for auditing in order to get answers to the questions below:\\n\\n- What happened?\\n- When did it happen?\\n- Who initiated it?\\n- What resource did it occur on?\\n- Where was it observed?\\n- From where was it initiated (Source IP)?\\n- Where was it going (Destination IP)?\\n\\nAnswers to the above questions become important when an incident occurs and an investigation follows. Alternatively, it could just be a log retention use case for a regulated company trying to fulfill compliance requirements.\xa0\\n\\nWe are giving special importance to audit logs in Kubernetes because audit logs are not enabled by default. Audit logs can take up a large amount of memory and storage. So, usually, it’s a balance between retaining/investigating audit logs against giving up resources budgeted otherwise for workloads to be hosted on the Kubernetes cluster. Another reason we’re talking about audit logs in Kubernetes is that, unlike usual container logs, after being turned on, these logs are orchestrated to write to the cloud provider’s logging service. This is true for most cloud providers because the Kubernetes control plane is managed by the cloud providers. It makes sense for cloud providers to use their built-in orchestration workflows involving the control plane for a managed service backed by their implementation of a logging framework.\\n\\nKubernetes audit logs can be quite verbose by default. Hence, it becomes important to selectively choose how much logging needs to be done so that all the audit requirements are met for the organization. This is done in the [audit policy](https://kubernetes.io/docs/tasks/debug/debug-cluster/audit/#audit-policy) file. The audit policy file is submitted against the` kube-apiserver.` It is not necessary that all flavors of cloud-provider-hosted Kubernetes clusters allow you to play with the `kube-apiserver` directly. For example, AWS EKS allows for this [logging](https://docs.aws.amazon.com/eks/latest/userguide/control-plane-logs.html) to be done only by the control plane.\\n\\n**In this blog we will be using Elastic Kubernetes Service (Amazon EKS) on AWS with the Kubernetes Audit Logs that are automatically shipped to AWS CloudWatch.**\\n\\nA sample audit log for a secret by the name “empty-secret” created by an admin user on EKS\xa0 is logged on AWS CloudWatch in the following format:\xa0\\n\\n![Alt text](/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/aws-clougwatch-logs.png)\\n\\nOnce the audit logs show up on CloudWatch, it is time to consider how to transfer them to Elasticsearch. Elasticsearch is a great platform for creating dashboards that visualize different audit events recorded in a Kubernetes cluster. It is also a powerful tool for analyzing various audit events. For example, how many secret object creation attempts were made in an hour?\xa0\\n\\nNow that we established the Kubernetes audit logs are being logged in CloudWatch, let’s discuss how to get the logs ingested into Elasticsearch. Elasticsearch has an integration to consume logs written on CloudWatch. Just using this integration by default is going to get the JSON from CloudWatch as is i.e. the real audit log JSON is nested inside the wrapper CloudWatch JSON. When bringing logs to Elasticsearch, it is important that we use the [Elastic Common Schema](https://www.elastic.co/guide/en/ecs/current/index.html)(ECS) to get the best search and analytics performance. This means that there needs to\xa0 be an ingest pipeline that parses a standard Kubernetes audit JSON message and creates an ECS Compliant document in Elasticsearch. Let’s dive into how to achieve this.\\n\\nElasticsearch has a Kubernetes integration using Elastic Agent to consume Kubernetes container logs from the console and audit logs written to a file path. For a cloud-provider use case, as described above, it may not be feasible to write audit logs to a path on the Kubernetes cluster. So, how do we leverage the[ ECS designed for parsing the Kubernetes audit logs](https://github.com/elastic/integrations/blob/main/packages/kubernetes/data_stream/audit_logs/fields/fields.yml) already implemented in the Kubernetes integration to work on the CloudWatch audit logs? That is the most exciting plumbing piece! Let’s see how to do it.\\n\\n### What we’re going to do is:\\n\\n- Read the Kubernetes audit logs from the cloud provider’s logging module, in our case, AWS CloudWatch since this is where logs reside. We will use Elastic Agent and [Elasticsearch AWS Custom Logs integration ](https://www.elastic.co/docs/current/integrations/aws_logs) to read from logs from CloudWatch. **Note:** please be aware, there are several Elastic AWS integration, we are specifically using the AWS Custom Logs integration. \\n\\n- Create two simple ingest pipelines (we do this for best practices of isolation and composability)\xa0\\n\\n- The first pipeline looks for Kubernetes audit JSON messages and then redirects them to the second pipeline\\n\\n- The second custom pipeline will associate the JSON `message` field with the correct field expected by the Elasticsearch Kubernetes Audit managed pipeline (aka the Integration) and then [`reroute`](https://www.elastic.co/guide/en/elasticsearch/reference/current/reroute-processor.html) the message to the correct data stream, `kubernetes.audit_logs-default,` which in turn applies all the proper mapping and ingest pipelines for the incoming message\\n\\n- The overall flow will be \\n\\n![Alt text](/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/overall-ingestion-flow.png)\\n\\n### 1. Create an AWS CloudWatch integration:\\n\\na. \xa0Populate the AWS access key and secret pair values\\n\\n![Alt text](/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/aws-custom-logs-integration-1.png)\\n\\nb. In the logs section, populate the log ARN, Tags and Preserve the original event if you want to, and then Save this integration and exit from the page\\n\\n![Alt text](/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/aws-custom-logs-integration-2.png)\\n\\n\\n### 2. Next, we will configure the custom ingest pipeline \\n\\nWe are doing this because we want to override what the generic managed pipeline does. We will retrieve the custom component name by searching for managed pipeline created as an asset when we install the AWS CloudWatch integration. In this case we will be adding the custom ingest pipeline `logs-aws_logs.generic@custom` \\n\\n![Alt text](/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/aws-logs-index-management.png)\\n\\n\\nFrom the Dev tools console, run below. Here, we are extracting the message field from the CloudWatch JSON and putting the value in a field called kubernetes.audit. Then, we are rerouting this message to the default Kubernetes audit dataset or ECS that comes with Kubernetes integration\\n\\n```\\nPUT _ingest/pipeline/logs-aws_logs.generic@custom\\n{\\n\xa0\xa0\xa0\xa0\\"processors\\": [\\n\xa0\xa0\xa0\xa0\xa0\xa0{\\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\\"pipeline\\": {\\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\\"if\\": \\"ctx.message.contains(\'audit.k8s.io\')\\",\\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\\"name\\": \\"logs-aws-process-k8s-audit\\"\\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0}\\n\xa0\xa0\xa0\xa0\xa0\xa0}\\n\xa0\xa0\xa0\xa0]\\n}\\n\\nPUT _ingest/pipeline/logs-aws-process-k8s-audit\\n{\\n\xa0\xa0\\"processors\\": [\\n\xa0\xa0\xa0\xa0{\\n\xa0\xa0\xa0\xa0\xa0\xa0\\"json\\": {\\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\\"field\\": \\"message\\",\\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\\"target_field\\": \\"kubernetes.audit\\"\\n\xa0\xa0\xa0\xa0\xa0\xa0}\\n\xa0\xa0\xa0\xa0},\\n\xa0\xa0\xa0\xa0{\\n\xa0\xa0\xa0\xa0\xa0\xa0\\"remove\\": {\\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\\"field\\": \\"message\\"\\n\xa0\xa0\xa0\xa0\xa0\xa0}\\n\xa0\xa0\xa0\xa0},\\n\xa0\xa0\xa0\xa0{\\n\xa0\xa0\xa0\xa0\xa0\xa0\\"reroute\\": {\\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\\"dataset\\": \\"kubernetes.audit_logs\\",\\n\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\\"namespace\\": \\"default\\"\\n\xa0\xa0\xa0\xa0\xa0\xa0}\\n\xa0\xa0\xa0\xa0}\\n\xa0\xa0]\\n}\\n```\\n\\nLet’s understand this further:\\n\\n- When we create a Kubernetes integration, we get a managed index template called `logs-kubernetes.audit_logs` that writes to the pipeline called `logs-kubernetes.audit_logs-1.62.2` by default\\n\\n- If we look into the pipeline` logs-kubernetes.audit_logs-1.62.2`, we see that all the processor logic is working against the field `kubernetes.audit`. This is the reason why our json processor in the above code snippet is creating a field called `kubernetes.audit `before dropping the original _message_ field and rerouting. Rerouting is directed to the `kubernetes.audit_logs` dataset that backs the `logs-kubernetes.audit_logs-1.62.2` pipeline (dataset name is derived from the pipeline name convention that’s in the format `logs--version`)\\n\\n![Alt text](/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/ingest-pipelines.png)\\n\\n\\n### 3. Now let’s verify that the logs are actually flowing through and the audit message is being parsed\\n\\na. We will use Elastic Agent and enroll using Fleet and the integration policy we created in the Step 1. There are a number of ways to [deploy Elastic Agent](https://www.elastic.co/guide/en/fleet/current/install-fleet-managed-elastic-agent.html) and for this exercise we will deploy using docker which is quick and easy.\\n\\n```\\n% docker run --env FLEET_ENROLL=1 --env FLEET_URL=<> --env FLEET_ENROLLMENT_TOKEN=<>\xa0 --rm docker.elastic.co/beats/elastic-agent:8.16.1\\n```\\n\\nb. Check the messages in Discover. In 8.15 there is also a new feature called Logs Explorer which provides an ability to see Kubernetes Audit logs (and container logs) with a few clicks (see image below). Voila! We can see the Kubernetes audit messages parsed!\\n\\n![Alt text](/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/discover.jpg)\\n\\n### 4. Let\'s do a quick recap of what we did\\n\\nWe configured CloudWatch integration in ElasticSearch to read Kubernetes audit logs from CloudWatch. Then, we created custom ingest pipelines to reroute the audit messages to the correct data stream and all the OOTB mappings and parsing that come with the Kubernetes Audit Logs integration.\xa0\\n\\nIn the next part, we’ll look at how to analyze the ingested Kubernetes Audit log data.\\n","code":"var Component=(()=>{var g=Object.create;var a=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var w=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var b=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var i in e)a(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let s of p(e))!m.call(n,s)&&s!==i&&a(n,s,{get:()=>e[s],enumerable:!(o=u(e,s))||o.enumerable});return n};var y=(n,e,i)=>(i=n!=null?g(w(n)):{},r(e||!n||!n.__esModule?a(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>r(a({},\\"__esModule\\",{value:!0}),n);var d=b((E,l)=>{l.exports=_jsx_runtime});var W={};f(W,{default:()=>h,frontmatter:()=>k});var t=y(d()),k={title:\\"Bringing Your Cloud-Managed Kubernetes Audit Logs into Elasticsearch\\",slug:\\"bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch\\",date:\\"2024-08-19\\",description:\\"How to bring your Cloud-Managed Kubernetes Audit Logs into Elasticsearch\\",author:[{slug:\\"rema-s\\"},{slug:\\"stephen-brown\\"}],image:\\"bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch.jpg\\",tags:[{slug:\\"aws\\"},{slug:\\"log-analytics\\"},{slug:\\"kubernetes\\"}]};function c(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.h2,{id:\\"introduction\\",children:\\"Introduction:\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Kubernetes audit logs are essential for ensuring the security, compliance, and transparency of Kubernetes clusters. However, with managed Kubernetes infrastructure, traditional audit file-based log shipping is often not supported, and audit logs are only available via the control plane API or the Cloud Provider logging facility. In this blog, we will show you how to ingest the audit logs from these other sources and still take advantage of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/kubernetes/audit-logs\\",rel:\\"nofollow\\",children:\\"Elastic Kubernetes Audit Log Integration\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog we will be focusing on AWS as our cloud provider and when ingesting logs from AWS you have several options:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/aws_logs\\",rel:\\"nofollow\\",children:\\"AWS Custom Logs integration\\"}),\\" (which we will utilize in this blog)\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/aws-kinesis-data-firehose-observability-analytics\\",rel:\\"nofollow\\",children:\\"AWS Firehose\\"}),\\" to send logs from Cloudwatch to Elastic\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/aws\\",rel:\\"nofollow\\",children:\\"AWS General integration\\"}),\\" which supports many AWS sources\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In part 1 of this two-part series, we will focus on properly ingesting Kubernetes Audit, and part 2 will focus on investigation, analytics, and alerting.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Kubernetes auditing \\",(0,t.jsx)(e.a,{href:\\"https://kubernetes.io/docs/tasks/debug/debug-cluster/audit/\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\" describes the need for auditing in order to get answers to the questions below:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"What happened?\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"When did it happen?\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Who initiated it?\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"What resource did it occur on?\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Where was it observed?\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"From where was it initiated (Source IP)?\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Where was it going (Destination IP)?\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Answers to the above questions become important when an incident occurs and an investigation follows. Alternatively, it could just be a log retention use case for a regulated company trying to fulfill compliance requirements.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We are giving special importance to audit logs in Kubernetes because audit logs are not enabled by default. Audit logs can take up a large amount of memory and storage. So, usually, it\\\\u2019s a balance between retaining/investigating audit logs against giving up resources budgeted otherwise for workloads to be hosted on the Kubernetes cluster. Another reason we\\\\u2019re talking about audit logs in Kubernetes is that, unlike usual container logs, after being turned on, these logs are orchestrated to write to the cloud provider\\\\u2019s logging service. This is true for most cloud providers because the Kubernetes control plane is managed by the cloud providers. It makes sense for cloud providers to use their built-in orchestration workflows involving the control plane for a managed service backed by their implementation of a logging framework.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Kubernetes audit logs can be quite verbose by default. Hence, it becomes important to selectively choose how much logging needs to be done so that all the audit requirements are met for the organization. This is done in the \\",(0,t.jsx)(e.a,{href:\\"https://kubernetes.io/docs/tasks/debug/debug-cluster/audit/#audit-policy\\",rel:\\"nofollow\\",children:\\"audit policy\\"}),\\" file. The audit policy file is submitted against the\\",(0,t.jsx)(e.code,{children:\\" kube-apiserver.\\"}),\\" It is not necessary that all flavors of cloud-provider-hosted Kubernetes clusters allow you to play with the \\",(0,t.jsx)(e.code,{children:\\"kube-apiserver\\"}),\\" directly. For example, AWS EKS allows for this \\",(0,t.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/eks/latest/userguide/control-plane-logs.html\\",rel:\\"nofollow\\",children:\\"logging\\"}),\\" to be done only by the control plane.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"In this blog we will be using Elastic Kubernetes Service (Amazon EKS) on AWS with the Kubernetes Audit Logs that are automatically shipped to AWS CloudWatch.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"A sample audit log for a secret by the name \\\\u201Cempty-secret\\\\u201D created by an admin user on EKS\\\\xA0 is logged on AWS CloudWatch in the following format:\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/aws-clougwatch-logs.png\\",alt:\\"Alt text\\",width:\\"1999\\",height:\\"1165\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once the audit logs show up on CloudWatch, it is time to consider how to transfer them to Elasticsearch. Elasticsearch is a great platform for creating dashboards that visualize different audit events recorded in a Kubernetes cluster. It is also a powerful tool for analyzing various audit events. For example, how many secret object creation attempts were made in an hour?\\\\xA0\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that we established the Kubernetes audit logs are being logged in CloudWatch, let\\\\u2019s discuss how to get the logs ingested into Elasticsearch. Elasticsearch has an integration to consume logs written on CloudWatch. Just using this integration by default is going to get the JSON from CloudWatch as is i.e. the real audit log JSON is nested inside the wrapper CloudWatch JSON. When bringing logs to Elasticsearch, it is important that we use the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/index.html\\",rel:\\"nofollow\\",children:\\"Elastic Common Schema\\"}),\\"(ECS) to get the best search and analytics performance. This means that there needs to\\\\xA0 be an ingest pipeline that parses a standard Kubernetes audit JSON message and creates an ECS Compliant document in Elasticsearch. Let\\\\u2019s dive into how to achieve this.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elasticsearch has a Kubernetes integration using Elastic Agent to consume Kubernetes container logs from the console and audit logs written to a file path. For a cloud-provider use case, as described above, it may not be feasible to write audit logs to a path on the Kubernetes cluster. So, how do we leverage the\\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/integrations/blob/main/packages/kubernetes/data_stream/audit_logs/fields/fields.yml\\",rel:\\"nofollow\\",children:\\" ECS designed for parsing the Kubernetes audit logs\\"}),\\" already implemented in the Kubernetes integration to work on the CloudWatch audit logs? That is the most exciting plumbing piece! Let\\\\u2019s see how to do it.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"what-were-going-to-do-is\\",children:\\"What we\\\\u2019re going to do is:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Read the Kubernetes audit logs from the cloud provider\\\\u2019s logging module, in our case, AWS CloudWatch since this is where logs reside. We will use Elastic Agent and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/aws_logs\\",rel:\\"nofollow\\",children:\\"Elasticsearch AWS Custom Logs integration \\"}),\\" to read from logs from CloudWatch. \\",(0,t.jsx)(e.strong,{children:\\"Note:\\"}),\\" please be aware, there are several Elastic AWS integration, we are specifically using the AWS Custom Logs integration.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Create two simple ingest pipelines (we do this for best practices of isolation and composability)\\\\xA0\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"The first pipeline looks for Kubernetes audit JSON messages and then redirects them to the second pipeline\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"The second custom pipeline will associate the JSON \\",(0,t.jsx)(e.code,{children:\\"message\\"}),\\" field with the correct field expected by the Elasticsearch Kubernetes Audit managed pipeline (aka the Integration) and then \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/reroute-processor.html\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.code,{children:\\"reroute\\"})}),\\" the message to the correct data stream, \\",(0,t.jsx)(e.code,{children:\\"kubernetes.audit_logs-default,\\"}),\\" which in turn applies all the proper mapping and ingest pipelines for the incoming message\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"The overall flow will be\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/overall-ingestion-flow.png\\",alt:\\"Alt text\\",width:\\"1742\\",height:\\"272\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"1-create-an-aws-cloudwatch-integration\\",children:\\"1. Create an AWS CloudWatch integration:\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"a. \\\\xA0Populate the AWS access key and secret pair values\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/aws-custom-logs-integration-1.png\\",alt:\\"Alt text\\",width:\\"1732\\",height:\\"2000\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"b. In the logs section, populate the log ARN, Tags and Preserve the original event if you want to, and then Save this integration and exit from the page\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/aws-custom-logs-integration-2.png\\",alt:\\"Alt text\\",width:\\"893\\",height:\\"904\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"2-next-we-will-configure-the-custom-ingest-pipeline\\",children:\\"2. Next, we will configure the custom ingest pipeline\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We are doing this because we want to override what the generic managed pipeline does. We will retrieve the custom component name by searching for managed pipeline created as an asset when we install the AWS CloudWatch integration. In this case we will be adding the custom ingest pipeline \\",(0,t.jsx)(e.code,{children:\\"logs-aws_logs.generic@custom\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/aws-logs-index-management.png\\",alt:\\"Alt text\\",width:\\"1999\\",height:\\"786\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"From the Dev tools console, run below. Here, we are extracting the message field from the CloudWatch JSON and putting the value in a field called kubernetes.audit. Then, we are rerouting this message to the default Kubernetes audit dataset or ECS that comes with Kubernetes integration\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`PUT _ingest/pipeline/logs-aws_logs.generic@custom\\n{\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"processors\\": [\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0{\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"pipeline\\": {\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"if\\": \\"ctx.message.contains(\'audit.k8s.io\')\\",\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"name\\": \\"logs-aws-process-k8s-audit\\"\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0}\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0}\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0]\\n}\\n\\nPUT _ingest/pipeline/logs-aws-process-k8s-audit\\n{\\n\\\\xA0\\\\xA0\\"processors\\": [\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0{\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"json\\": {\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"field\\": \\"message\\",\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"target_field\\": \\"kubernetes.audit\\"\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0}\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0},\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0{\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"remove\\": {\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"field\\": \\"message\\"\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0}\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0},\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0{\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"reroute\\": {\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"dataset\\": \\"kubernetes.audit_logs\\",\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\"namespace\\": \\"default\\"\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0\\\\xA0}\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0}\\n\\\\xA0\\\\xA0]\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s understand this further:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"When we create a Kubernetes integration, we get a managed index template called \\",(0,t.jsx)(e.code,{children:\\"logs-kubernetes.audit_logs\\"}),\\" that writes to the pipeline called \\",(0,t.jsx)(e.code,{children:\\"logs-kubernetes.audit_logs-1.62.2\\"}),\\" by default\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"If we look into the pipeline\\",(0,t.jsx)(e.code,{children:\\" logs-kubernetes.audit_logs-1.62.2\\"}),\\", we see that all the processor logic is working against the field \\",(0,t.jsx)(e.code,{children:\\"kubernetes.audit\\"}),\\". This is the reason why our json processor in the above code snippet is creating a field called \\",(0,t.jsx)(e.code,{children:\\"kubernetes.audit \\"}),\\"before dropping the original \\",(0,t.jsx)(e.em,{children:\\"message\\"}),\\" field and rerouting. Rerouting is directed to the \\",(0,t.jsx)(e.code,{children:\\"kubernetes.audit_logs\\"}),\\" dataset that backs the \\",(0,t.jsx)(e.code,{children:\\"logs-kubernetes.audit_logs-1.62.2\\"}),\\" pipeline (dataset name is derived from the pipeline name convention that\\\\u2019s in the format \\",(0,t.jsx)(e.code,{children:\\"logs--version\\"}),\\")\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/ingest-pipelines.png\\",alt:\\"Alt text\\",width:\\"1999\\",height:\\"1152\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"3--now-lets-verify-that-the-logs-are-actually-flowing-through-and-the-audit-message-is-being-parsed\\",children:\\"3. Now let\\\\u2019s verify that the logs are actually flowing through and the audit message is being parsed\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"a. We will use Elastic Agent and enroll using Fleet and the integration policy we created in the Step 1. There are a number of ways to \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/install-fleet-managed-elastic-agent.html\\",rel:\\"nofollow\\",children:\\"deploy Elastic Agent\\"}),\\" and for this exercise we will deploy using docker which is quick and easy.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`% docker run --env FLEET_ENROLL=1 --env FLEET_URL=<> --env FLEET_ENROLLMENT_TOKEN=<>\\\\xA0 --rm docker.elastic.co/beats/elastic-agent:8.16.1\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"b. Check the messages in Discover. In 8.15 there is also a new feature called Logs Explorer which provides an ability to see Kubernetes Audit logs (and container logs) with a few clicks (see image below). Voila! We can see the Kubernetes audit messages parsed!\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/discover.jpg\\",alt:\\"Alt text\\",width:\\"4908\\",height:\\"2414\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"4-lets-do-a-quick-recap-of-what-we-did\\",children:\\"4. Let\'s do a quick recap of what we did\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We configured CloudWatch integration in ElasticSearch to read Kubernetes audit logs from CloudWatch. Then, we created custom ingest pipelines to reroute the audit messages to the correct data stream and all the OOTB mappings and parsing that come with the Kubernetes Audit Logs integration.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the next part, we\\\\u2019ll look at how to analyze the ingested Kubernetes Audit log data.\\"})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(c,{...n})}):c(n)}return v(W);})();\\n;return Component;"},"_id":"articles/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch.mdx","_raw":{"sourceFilePath":"articles/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch.mdx","sourceFileName":"bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch"},"type":"Article","imageUrl":"/assets/images/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch.jpg","readingTime":"9 min read","url":"/bringing-your-cloud-managed-kubernetes-audit-logs-into-elasticsearch","headings":[{"level":2,"title":"Introduction:","href":"#introduction"},{"level":3,"title":"What we’re going to do is:","href":"#what-were-going-to-do-is"},{"level":3,"title":"1. Create an AWS CloudWatch integration:","href":"#1-create-an-aws-cloudwatch-integration"},{"level":3,"title":"2. Next, we will configure the custom ingest pipeline ","href":"#2-next-we-will-configure-the-custom-ingest-pipeline-"},{"level":3,"title":"3. Now let’s verify that the logs are actually flowing through and the audit message is being parsed","href":"#3--now-lets-verify-that-the-logs-are-actually-flowing-through-and-the-audit-message-is-being-parsed"},{"level":3,"title":"4. Let\'s do a quick recap of what we did","href":"#4-lets-do-a-quick-recap-of-what-we-did"}]},{"title":"Beyond the trace: Pinpointing performance culprits with continuous profiling and distributed tracing correlation","slug":"continuous-profiling-distributed-tracing-correlation","date":"2024-03-28","description":"Frustrated by slow traces but unsure where the code bottleneck lies? Elastic Universal Profiling correlates profiling stacktraces with OpenTelemetry (OTel) traces, helping you identify and pinpoint the exact lines of code causing performance issues.","image":"Under_highway_bridge.jpg","author":[{"slug":"joel-honer","type":"Author","_raw":{}},{"slug":"israel-ogbole","type":"Author","_raw":{}},{"slug":"jonas-kunz","type":"Author","_raw":{}}],"tags":[{"slug":"universal-profiling","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}}],"body":{"raw":"\\nObservability goes beyond monitoring; it\'s about truly understanding your system. To achieve this comprehensive view, practitioners need a unified observability solution that natively combines insights from metrics, logs, traces, and crucially, **continuous profiling**. While metrics, logs, and traces offer valuable insights, they can\'t answer the all-important \\"why.\\" Continuous profiling signals act as a magnifying glass, providing granular code visibility into the system\'s hidden complexities. They fill the gap left by other data sources, enabling you to answer critical questions –– why is this trace slow? Where exactly in the code is the bottleneck residing?\\n\\nTraces provide the \\"what\\" and \\"where\\" — what happened and where in your system. Continuous profiling refines this understanding by pinpointing the \\"why\\" and validating your hypotheses about the \\"what.\\" Just like a full-body MRI scan, Elastic\'s whole-system continuous profiling (powered by eBPF) uncovers unknown-unknowns in your system. This includes not just your code, but also third-party libraries and kernel activity triggered by your application transactions. This comprehensive visibility improves your mean-time-to-detection (MTTD) and mean-time-to-recovery (MTTR) KPIs.\\n\\n_[Related article:_ [_Why metrics, logs, and traces aren’t enough_](https://www.elastic.co/blog/observability-profiling-metrics-logs-traces)_]_\\n\\n## Bridging the disconnect between continuous profiling and OTel traces\\n\\nHistorically, continuous profiling signals have been largely disconnected from OpenTelemetry (OTel) traces. Here\'s the exciting news: we\'re bridging this gap! We\'re introducing native correlation between continuous profiling signals and OTel traces, starting with Java.\\n\\nImagine this: You\'re troubleshooting a performance issue and identify a slow trace. Whole-system continuous profiling steps in, acting like an MRI scan for your entire codebase and system. It narrows down the culprit to the specific lines of code hogging CPU time within the context of your distributed trace. This empowers you to answer the \\"why\\" question with minimal effort and confidence, all within the same troubleshooting context.\\n\\nFurthermore, by correlating continuous profiling with distributed tracing, Elastic Observability customers can measure the cloud cost and CO2 impact of every code change at the service and transaction level.\\n\\nThis milestone is significant, especially considering the recent developments in the OTel community. With [OTel adopting profiling](https://www.cncf.io/blog/2024/03/19/opentelemetry-announces-support-for-profiling/) and Elastic [donating the industry’s most advanced eBPF-based continuous profiling agent to OTel](https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry), we\'re set for a game-changer in observability — empowering OTel end users with a correlated system visibility that goes from a trace span in the userspace down to the kernel.\\n\\nFurthermore, achieving this goal, especially with Java, presented significant challenges and demanded serious engineering R&D. This blog post will delve into these challenges, explore the approaches we considered in our proof-of-concepts, and explain how we arrived at a solution that can be easily extended to other OTel language agents. Most importantly, this solution correlates traces with profiling signals at the agent, not in the backend — to ensure optimal query performance and minimal reliance on vendor backend storage architectures.\\n\\n![Profiling flamegraph for a specific trace.id](/assets/images/continuous-profiling-distributed-tracing-correlation/trace.png)\\n\\n## Figuring out the active OTel trace and span\\n\\nThe primary technical challenge in this endeavor is essentially the following: whenever the profiler interrupts an OTel instrumented process to capture a stacktrace, we need to be able to efficiently determine the active span and trace ID (per-thread) and the service name (per-process).\\n\\nFor the purpose of this blog, we\'ll focus on the recently released [Elastic distribution of the OTel Java instrumentation](https://github.com/elastic/elastic-otel-java), but the approach that we ended up with generalizes to any language that can load and call into a native library. So, how do we get our hands on those IDs?\\n\\n![Profiling correlated with service.name, showing CO2 and cloud cost impact by line of code.](/assets/images/continuous-profiling-distributed-tracing-correlation/service-popout.png)\\n\\nThe OTel Java agent itself keeps track of the active span by storing a stack of spans in the [OpenTelemetryContext](https://opentelemetry.io/docs/concepts/context-propagation/#context), which itself is stored in a [ThreadLocal](https://docs.oracle.com/javase/8/docs/api/java/lang/ThreadLocal.html) variable. We originally considered reading these Java structures directly from BPF, but we eventually decided against that approach. There is no documented specification on how ThreadLocals are implemented, and reliably reading and following the JVM\'s internal data-structures would incur a high maintenance burden. Any minor update to the JVM could change details of the structure layouts. To add to this, we would also have to reverse engineer how each JVM version lays out Java class fields in memory, as well as how all the high-level Java types used in the context objects are actually implemented under the hood. This approach further wouldn\'t generalize to any non-JVM language and needs to be repeated for any language that we wish to support.\\n\\nAfter we had convinced ourselves that reading Java ThreadLocal directly is not the answer, we decided to look for more portable alternatives instead. The option that we ultimately settled with is to load and call into a C++ library that is responsible for making the required information available via a known and defined interface whenever the span changes.\\n\\nOther than with Java\'s ThreadLocals, the details on how a native shared library should expose per-process and per-thread data are well-defined in the System V ABI specification and the architecture specific ELF ABI documents.\\n\\n## Exposing per-process information\\n\\nExposing per-process data is easy: we simply declare a global variable . . .\\n\\n```java\\nvoid* elastic_tracecorr_process_storage_v1 = nullptr;\\n```\\n\\n. . . and expose it via ELF symbols. When the user initializes the OTel library to set the service name, we allocate a buffer and populate it with data in a [protocol that we defined for this purpose](https://github.com/elastic/apm/blob/149cd3e39a77a58002344270ed2ad35357bdd02d/specs/agents/universal-profiling-integration.md#process-storage-layout). Once the buffer is fully populated, we update the global pointer to point to the buffer.\\n\\nOn the profiling agent side, we already have code in place that detects libraries and executables loaded into any process\'s address space. We normally use this mechanism to detect and analyze high-level language interpreters (e.g., libpython, libjvm) when they are loaded, but it also turned out to be a perfect fit to detect the OTel trace correlation library. When the library is detected in a process, we scan the exports, resolve the symbol, and read the per-process information directly from the instrumented process’ memory.\\n\\n## Exposing per-thread information\\n\\nWith the easy part out of the way, let\'s get to the nitty-gritty portion: exposing per-thread information via thread-local storage (TLS). So, what exactly is TLS, and how does it work? At the most basic level, the idea is to have **one instance of a variable for every thread**. Semantically you can think of it like having a global Map\\\\, although that is not how it is implemented.\\n\\nOn Linux, there are two major options for thread locals: TSD and TLS.\\n\\n## Thread-specific data (TSD)\\n\\nTSD is the older and probably more commonly known variant. It works by explicitly allocating a key via pthread_key_create — usually during process startup — and passing it to all threads that require access to the thread-local variable. The threads can then pass that key to the pthread_getspecific and pthread_setspecific functions to read and update the variable for the currently running thread.\\n\\nTSD is simple, but for our purposes it has a range of drawbacks:\\n\\n- The pthread_key_t structure is opaque and doesn\'t have a defined layout. Similar to the Java ThreadLocals, the underlying data-structures aren\'t defined by the ABI documents and different libc implementations (glibc, musl) will handle them differently.\\n\\n- We cannot call a function like pthread_getspecific from BPF, so we\'d have to reverse engineer and reimplement the logic. Logic may change between libc versions, and we’d have to detect the version and support all variants that may come up in the wild.\\n\\n- TSD performance is not predictable and varies depending on how many thread local variables have been allocated in the process previously. This may not be a huge concern for Java specifically since spans are typically not swapped super rapidly, but it’d likely be quite noticeable for user-mode scheduling languages where the context might need to be swapped at every await point/coroutine yield.\\n\\nNone of this is strictly prohibitive, but a lot of this is annoying at the very least. Let’s see if we can do better!\\n\\n## Thread-local storage (TLS)\\n\\nStarting with C11 and C++11, both languages support thread local variables directly via the \\\\_Thread_local and thread_local storage specifiers, respectively. Declaring a variable as per-thread is now a matter of simply adding the keyword:\\n\\n```java\\nthread_local void* elastic_tracecorr_tls_v1 = nullptr;\\n```\\n\\nYou might assume that the compiler simply inserts calls to the corresponding pthread function calls when variables declared with this are accessed, but this is not actually the case. The reality is surprisingly complicated, and it turns out that there are four different models of TLS that the compiler can choose to generate. For some of those models, there are further multiple dialects that can be used to implement them. The different models and dialects come with various portability versus performance trade-offs. If you are interested in the details, I suggest reading this [blog article](https://maskray.me/blog/2021-02-14-all-about-thread-local-storage) that does a great job at explaining them.\\n\\nThe TLS model and dialect are usually chosen by the compiler based on a somewhat opaque and complicated set of architecture-specific rules. Fortunately for us, both gcc and clang allow users to pick a particular one using the -ftls-model and -mtls-dialect arguments. The variant that we ended up picking for our purposes is -ftls-model=global-dynamic and -mtls-dialect=gnu2 (and desc on aarch64).\\n\\nLet\'s take a look at the assembly that is being generated when accessing a thread_local variable under these settings. Our function:\\n\\n```java\\nvoid setThreadProfilingCorrelationBuffer(JNIEnv* jniEnv, jobject bytebuffer) {\\n if (bytebuffer == nullptr) {\\n elastic_tracecorr_tls_v1 = nullptr;\\n } else {\\n elastic_tracecorr_tls_v1 = jniEnv->GetDirectBufferAddress(bytebuffer);\\n }\\n}\\n```\\n\\nIs compiled to the following assembly code:\\n\\n![assembly](/assets/images/continuous-profiling-distributed-tracing-correlation/assembly.png)\\n\\nBoth possible branches assign a value to our thread-local variable. Let’s focus at the right branch corresponding to the nullptr case to get rid of the noise from the GetDirectBufferAddress function call:\\n\\n```java\\nlea rax, elastic_tracecorr_tls_v1_tlsdesc ;; Load some pointer into rax.\\ncall qword ptr [rax] ;; Read & call function pointer at rax.\\nmov qword ptr fs:[rax], 0 ;; Assign 0 to the pointer returned by\\n ;; the function that we just called.\\n```\\n\\nThe fs: portion of the mov instruction is the actual magic bit that makes the memory read per-thread. We’ll get to that later; let’s first look at the mysterious elastic_tracecorr_tls_v1_tlsdesc variable that the compiler emitted here. It’s an instance of the tlsdesc structure that is located somewhere in the .got.plt ELF section. The structure looks like this:\\n\\n```java\\nstruct tlsdesc {\\n // Function pointer used to retrieve the offset\\n uint64_t (*resolver)(tlsdesc*);\\n\\n // TLS offset -- more on that later.\\n uint64_t tp_offset;\\n}\\n```\\n\\nThe resolver field is initialized with nullptr and tp_offset with a per-executable offset. The first thread-local variable in an executable will usually have offset 0, the next one sizeof(first_var), and so on. At first glance this may appear to be similar to how TSD works, with the call to pthread_getspecific to resolve the actual offset, but there is a crucial difference. When the library is loaded, the resolver field is filled in with the address of \\\\_\\\\_tls_get_addr by the loader (ld.so). \\\\_\\\\_tls_get_addr is a relatively heavy function that allocates a TLS offset that is globally unique between all shared libraries in the process. It then proceeds by updating the tlsdesc structure itself, inserting the global offset and replacing the resolver function with a trivial one:\\n\\n```java\\nvoid* second_stage_resolver(tlsdesc* desc) {\\n return tlsdesc->tp_offset;\\n}\\n```\\n\\nIn essence, this means that the first access to a tlsdesc based thread-local variable is rather expensive, but all subsequent ones are cheap. We further know that by the time that our C++ library starts publishing per-thread data, it must have gone through the initial resolving process already. Consequently, all that we need to do is to read the final offset from the process\'s memory and memorize it. We also refresh the offset every now and then to ensure that we really have the final offset, combating the unlikely but possible race condition that we read the offset before it was initialized. We can detect this case by comparing the resolver address against the address of the \\\\_\\\\_tls_get_addr function exported by ld.so.\\n\\n## Determining the TLS offset from an external process\\n\\nWith that out of the way, the next question that arises is how to actually find the tlsdesc in memory so that we can read the offset. Intuitively one might expect that the dynamic symbol exported on the ELF file points to that descriptor, but that is not actually the case.\\n\\n```bash\\n$ readelf --wide --dyn-syms elastic-jvmti-linux-x64.so | grep elastic_tracecorr_tls_v1\\n328: 0000000000000000 \\t8 TLS \\tGLOBAL DEFAULT 19 elastic_tracecorr_tls_v1\\n```\\n\\nThe dynamic symbol instead contains an offset relative to the start of the .tls ELF section and points to the initial value that libc initializes the TLS value with when it is allocated. So how does ld.so find the tlsdesc to fill in the initial resolver? In addition to the dynamic symbol, the compiler also emits a relocation record for our symbol, and that one actually points to the descriptor structure that we are looking for.\\n\\n```bash\\n$ readelf --relocs --wide elastic-jvmti-linux-x64.so | grep R_X86_64_TLSDESC\\n00000000000426e8 0000014800000024 R_X86_64_TLSDESC \\t0000000000000000\\nelastic_tracecorr_tls_v1 + 0\\n```\\n\\nTo read the final TLS offset, we thus simply have to:\\n\\n- Wait for the event notifying us about a new shared library being loaded into a process\\n\\n- Do some cheap heuristics to detect our C++ library, avoiding the more expensive analysis below from being executed for every unrelated library on the system\\n\\n- Analyze the library on disk and scan ELF relocations for our per-thread variable to extract the tlsdesc address\\n\\n- Rebase that address to match where our library was loaded in that particular process\\n\\n- Read the offset from tlsdesc+8\\n\\n## Determining the TLS base\\n\\nNow that we have the offset, how do we use that to actually read the data that the library puts there for us? This brings us back to the magic fs: portion of the mov instruction that we discussed earlier. In X86, most memory operands can optionally be supplied with a segment register that influences the address translation.\\n\\nSegments are an archaic construct from the early days of 16-bit X86 where they were used to extend the address space. Essentially the architecture provides a range of segment registers that can be configured with different base addresses, thus allowing more than 16-bits worth of memory to be accessed. In times of 64-bit processors, this is hardly a concern anymore. In fact, X86-64 aka AMD64 got rid of all but two of those segment registers: fs and gs.\\n\\nSo why keep two of them? It turns out that they are quite useful for the use-case of thread-local data. Since every thread can be configured to have its own base address in these segment registers, we can use it to point to a block of data for this specific thread. That is precisely what libc implementations on Linux are doing with the fs segment. The offset that we snatched from the processes memory earlier is used as an address with the fs segment register, and the CPU automatically adds it to the per-thread base address.\\n\\nTo retrieve the base address pointed to by the fs segment register in the kernel, we need to read its destination from the kernel’s task_struct for the thread that we happened to interrupt with our profiling timer event. Getting the task struct is easy because we are blessed with the bpf_get_current_task BPF helper functions. BPF helpers are pretty much syscalls for BPF programs: we can just ask the Linux kernel to hand us the pointer.\\n\\nArmed with the task pointer, we now have to read the thread.fsbase (X86-64) or thread.uw.tp_value (aarch64) field to get our desired base address that the user-mode process accesses via fs. This is where things get complicated one last time, at least if we wish to support older kernels without [BTF support](https://www.kernel.org/doc/html/latest/bpf/btf.html) (we do!). The [task_struct is huge](https://github.com/torvalds/linux/blob/259f7d5e2baf87fcbb4fabc46526c9c47fed1914/include/linux/sched.h#L748) and there are hundreds of fields that can be present or not depending on how the kernel is configured. Being a core primitive of the scheduler, it is also constantly subject to changes between different kernel versions. On modern Linux distributions, the kernel is typically nice enough to tell us the offset via BTF. On older ones, the situation is more complicated. Since hardcoding the offset is clearly not an option if we hope the code to be portable, we instead have to figure out the offset by ourselves.\\n\\nWe do this by consulting /proc/kallsyms, a file with mappings between kernel functions and their addresses, and then using BPF to dump the compiled code of a kernel function that rarely changes and uses the desired offset. We dynamically disassemble and analyze the function and extract the offset directly from the assembly. For X86-64 specifically, we dump the [aout_dump_debugregs](https://elixir.bootlin.com/linux/v5.9.16/source/arch/x86/kernel/hw_breakpoint.c#L452) function that accesses thread-\\\\>ptrace_bps, which has consistently been 16 bytes away from the fsbase field that we are interested in for all kernels that we have ever looked at.\\n\\n## Reading TLS data from kernel\\n\\nWith all the required offsets at our hands, we can now finally do what we set out to do in the first place: use them to enrich our stack traces with the OTel trace and span IDs that our C++ library prepared for us!\\n\\n```java\\nvoid maybe_add_otel_info(Trace* trace) {\\n // Did user-mode insert a TLS offset for this process? Read it.\\n TraceCorrProcInfo* proc = bpf_map_lookup_elem(&tracecorr_procs, &trace->pid);\\n\\n // No entry -> process doesn\'t have the C++ library loaded.\\n if (!proc) return;\\n\\n // Load the fsbase offset from our global configuration map.\\n u32 key = 0;\\n SystemConfig* syscfg = bpf_map_lookup_elem(&system_config, &key);\\n\\n // Read the fsbase offset from the kernel\'s task struct.\\n u8* fsbase;\\n u8* task = (u8*)bpf_get_current_task();\\n bpf_probe_read_kernel(&fsbase, sizeof(fsbase), task + syscfg->fsbase_offset);\\n\\n // Use the TLS offset to read the **pointer** to our TLS buffer.\\n void* corr_buf_ptr;\\n bpf_probe_read_user(\\n &corr_buf_ptr,\\n sizeof(corr_buf_ptr),\\n fsbase + proc->tls_offset\\n );\\n\\n // Read the information that our library prepared for us.\\n TraceCorrelationBuf corr_buf;\\n bpf_probe_read_user(&corr_buf, sizeof(corr_buf), corr_buf_ptr);\\n\\n // If the library reports that we are currently in a trace, store it into\\n // the stack trace that will be reported to our user-land process.\\n if (corr_buf.trace_present && corr_buf.valid) {\\n trace->otel_trace_id.as_int.hi = corr_buf.trace_id.as_int.hi;\\n trace->otel_trace_id.as_int.lo = corr_buf.trace_id.as_int.lo;\\n trace->otel_span_id.as_int = corr_buf.span_id.as_int;\\n }\\n}\\n```\\n\\n## Sending out the mappings\\n\\nFrom this point on, everything further is pretty simple. The C++ library sets up a unix datagram socket during startup and communicates the socket path to the profiler via the per-process data block. The stacktraces annotated with the OTel trace and span IDs are sent from BPF to our user-mode profiler process via perf event buffers, which in turn sends the mappings between OTel span and trace and stack trace hashes to the C++ library. Our extensions to the OTel instrumentation framework then read those mappings and insert the stack trace hashes into the OTel trace.\\n\\nThis approach has a few major upsides compared to the perhaps more obvious alternative of sending out the OTel span and trace ID with the profiler’s stacktrace records. We want the stacktrace associations to be stored in the trace indices to allow filtering and aggregating stacktraces by the plethora of fields available on OTel traces. If we were to send out the trace IDs via the profiler\'s gRPC connection instead, we’d have to search for and update the corresponding OTel trace records in the profiling collector to insert the stack trace hashes.\\n\\nThis is not trivial: stacktraces are sent out rather frequently (every 5 seconds, as of writing) and the corresponding OTel trace might not have been sent and stored by the time the corresponding stack traces arrive in our cluster. We’d have to build a kind of delay queue and periodically retry updating the OTel trace documents, introducing avoidable database work and complexity in the collectors. With the approach of sending stacktrace mappings to the OTel instrumented process instead, the need for server-side merging vanishes entirely.\\n\\n## Trace correlation in action\\n\\nWith all the hard work out of the way, let’s take a look at what trace correlation looks like in action!\\n\\n\\n\\n## Future work: Supporting other languages\\n\\nWe have demonstrated that trace correlation can work nicely for Java, but we have no intention of stopping there. The general approach that we discussed previously should work for any language that can efficiently load and call into our C++ library and doesn’t do user-mode scheduling with coroutines. The problem with user-mode scheduling is that the logical thread can change at any await/yield point, requiring us to update the trace IDs in TLS. Many such coroutine environments like Rust’s Tokio provide the ability to register a callback for whenever the active task is swapped, so they can be supported easily. Other languages, however, do not provide that option.\\n\\nOne prominent example in that category is Go: goroutines are built on user-mode scheduling, but to our knowledge there’s no way to instrument the scheduler. Such languages will need solutions that don’t go via the generic TLS path. For Go specifically, we have already built a prototype that uses pprof labels that are associated with a specific Goroutine, having Go’s scheduler update them for us automatically.\\n\\n## Getting started\\n\\nWe hope this blog post has given you an overview of correlating profiling signals to distributed tracing, and its benefits for end-users.\\n\\nTo get started, download the [Elastic distribution of the OTel agent](https://github.com/elastic/elastic-otel-java), which contains the new trace correlation library. Additionally, you will need the latest version of Universal Profiling agent, bundled with [Elastic Stack version 8.13](https://www.elastic.co/blog/whats-new-elastic-8-13-0).\\n\\n## Acknowledgment\\n\\nWe appreciate [Trask Stalnaker](https://github.com/trask), maintainer of the OTel Java agent, for his feedback on our approach and for reviewing the early draft of this blog post.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var i=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var b=(a,e)=>()=>(e||a((e={exports:{}}).exports,e),e.exports),y=(a,e)=>{for(var n in e)i(a,n,{get:e[n],enumerable:!0})},s=(a,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of f(e))!m.call(a,r)&&r!==n&&i(a,r,{get:()=>e[r],enumerable:!(o=u(e,r))||o.enumerable});return a};var w=(a,e,n)=>(n=a!=null?p(g(a)):{},s(e||!a||!a.__esModule?i(n,\\"default\\",{value:a,enumerable:!0}):n,a)),v=a=>s(i({},\\"__esModule\\",{value:!0}),a);var c=b((L,l)=>{l.exports=_jsx_runtime});var k={};y(k,{default:()=>d,frontmatter:()=>_});var t=w(c()),_={title:\\"Beyond the trace: Pinpointing performance culprits with continuous profiling and distributed tracing correlation\\",slug:\\"continuous-profiling-distributed-tracing-correlation\\",date:\\"2024-03-28\\",description:\\"Frustrated by slow traces but unsure where the code bottleneck lies? Elastic Universal Profiling correlates profiling stacktraces with OpenTelemetry (OTel) traces, helping you identify and pinpoint the exact lines of code causing performance issues.\\",author:[{slug:\\"joel-honer\\"},{slug:\\"israel-ogbole\\"},{slug:\\"jonas-kunz\\"}],image:\\"Under_highway_bridge.jpg\\",tags:[{slug:\\"universal-profiling\\"},{slug:\\"apm\\"},{slug:\\"opentelemetry\\"}]};function h(a){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...a.components},{Video:n}=e;return n||T(\\"Video\\",!0),(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Observability goes beyond monitoring; it\'s about truly understanding your system. To achieve this comprehensive view, practitioners need a unified observability solution that natively combines insights from metrics, logs, traces, and crucially, \\",(0,t.jsx)(e.strong,{children:\\"continuous profiling\\"}),`. While metrics, logs, and traces offer valuable insights, they can\'t answer the all-important \\"why.\\" Continuous profiling signals act as a magnifying glass, providing granular code visibility into the system\'s hidden complexities. They fill the gap left by other data sources, enabling you to answer critical questions \\\\u2013\\\\u2013 why is this trace slow? Where exactly in the code is the bottleneck residing?`]}),`\\n`,(0,t.jsx)(e.p,{children:`Traces provide the \\"what\\" and \\"where\\" \\\\u2014 what happened and where in your system. Continuous profiling refines this understanding by pinpointing the \\"why\\" and validating your hypotheses about the \\"what.\\" Just like a full-body MRI scan, Elastic\'s whole-system continuous profiling (powered by eBPF) uncovers unknown-unknowns in your system. This includes not just your code, but also third-party libraries and kernel activity triggered by your application transactions. This comprehensive visibility improves your mean-time-to-detection (MTTD) and mean-time-to-recovery (MTTR) KPIs.`}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.em,{children:\\"[Related article:\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-profiling-metrics-logs-traces\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.em,{children:\\"Why metrics, logs, and traces aren\\\\u2019t enough\\"})}),(0,t.jsx)(e.em,{children:\\"]\\"})]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"bridging-the-disconnect-between-continuous-profiling-and-otel-traces\\",children:\\"Bridging the disconnect between continuous profiling and OTel traces\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Historically, continuous profiling signals have been largely disconnected from OpenTelemetry (OTel) traces. Here\'s the exciting news: we\'re bridging this gap! We\'re introducing native correlation between continuous profiling signals and OTel traces, starting with Java.\\"}),`\\n`,(0,t.jsx)(e.p,{children:`Imagine this: You\'re troubleshooting a performance issue and identify a slow trace. Whole-system continuous profiling steps in, acting like an MRI scan for your entire codebase and system. It narrows down the culprit to the specific lines of code hogging CPU time within the context of your distributed trace. This empowers you to answer the \\"why\\" question with minimal effort and confidence, all within the same troubleshooting context.`}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Furthermore, by correlating continuous profiling with distributed tracing, Elastic Observability customers can measure the cloud cost and CO\\",(0,t.jsx)(\\"sub\\",{children:\\"2\\"}),\\" impact of every code change at the service and transaction level.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This milestone is significant, especially considering the recent developments in the OTel community. With \\",(0,t.jsx)(e.a,{href:\\"https://www.cncf.io/blog/2024/03/19/opentelemetry-announces-support-for-profiling/\\",rel:\\"nofollow\\",children:\\"OTel adopting profiling\\"}),\\" and Elastic \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry\\",rel:\\"nofollow\\",children:\\"donating the industry\\\\u2019s most advanced eBPF-based continuous profiling agent to OTel\\"}),\\", we\'re set for a game-changer in observability \\\\u2014 empowering OTel end users with a correlated system visibility that goes from a trace span in the userspace down to the kernel.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Furthermore, achieving this goal, especially with Java, presented significant challenges and demanded serious engineering R&D. This blog post will delve into these challenges, explore the approaches we considered in our proof-of-concepts, and explain how we arrived at a solution that can be easily extended to other OTel language agents. Most importantly, this solution correlates traces with profiling signals at the agent, not in the backend \\\\u2014 to ensure optimal query performance and minimal reliance on vendor backend storage architectures.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/continuous-profiling-distributed-tracing-correlation/trace.png\\",alt:\\"Profiling flamegraph for a specific trace.id\\",width:\\"3090\\",height:\\"1760\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"figuring-out-the-active-otel-trace-and-span\\",children:\\"Figuring out the active OTel trace and span\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The primary technical challenge in this endeavor is essentially the following: whenever the profiler interrupts an OTel instrumented process to capture a stacktrace, we need to be able to efficiently determine the active span and trace ID (per-thread) and the service name (per-process).\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For the purpose of this blog, we\'ll focus on the recently released \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java\\",rel:\\"nofollow\\",children:\\"Elastic distribution of the OTel Java instrumentation\\"}),\\", but the approach that we ended up with generalizes to any language that can load and call into a native library. So, how do we get our hands on those IDs?\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/continuous-profiling-distributed-tracing-correlation/service-popout.png\\",alt:\\"Profiling correlated with service.name, showing CO2 and cloud cost impact by line of code.\\",width:\\"3192\\",height:\\"2068\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The OTel Java agent itself keeps track of the active span by storing a stack of spans in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/concepts/context-propagation/#context\\",rel:\\"nofollow\\",children:\\"OpenTelemetryContext\\"}),\\", which itself is stored in a \\",(0,t.jsx)(e.a,{href:\\"https://docs.oracle.com/javase/8/docs/api/java/lang/ThreadLocal.html\\",rel:\\"nofollow\\",children:\\"ThreadLocal\\"}),\\" variable. We originally considered reading these Java structures directly from BPF, but we eventually decided against that approach. There is no documented specification on how ThreadLocals are implemented, and reliably reading and following the JVM\'s internal data-structures would incur a high maintenance burden. Any minor update to the JVM could change details of the structure layouts. To add to this, we would also have to reverse engineer how each JVM version lays out Java class fields in memory, as well as how all the high-level Java types used in the context objects are actually implemented under the hood. This approach further wouldn\'t generalize to any non-JVM language and needs to be repeated for any language that we wish to support.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"After we had convinced ourselves that reading Java ThreadLocal directly is not the answer, we decided to look for more portable alternatives instead. The option that we ultimately settled with is to load and call into a C++ library that is responsible for making the required information available via a known and defined interface whenever the span changes.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Other than with Java\'s ThreadLocals, the details on how a native shared library should expose per-process and per-thread data are well-defined in the System V ABI specification and the architecture specific ELF ABI documents.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"exposing-per-process-information\\",children:\\"Exposing per-process information\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Exposing per-process data is easy: we simply declare a global variable . . .\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`void* elastic_tracecorr_process_storage_v1 = nullptr;\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\". . . and expose it via ELF symbols. When the user initializes the OTel library to set the service name, we allocate a buffer and populate it with data in a \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm/blob/149cd3e39a77a58002344270ed2ad35357bdd02d/specs/agents/universal-profiling-integration.md#process-storage-layout\\",rel:\\"nofollow\\",children:\\"protocol that we defined for this purpose\\"}),\\". Once the buffer is fully populated, we update the global pointer to point to the buffer.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"On the profiling agent side, we already have code in place that detects libraries and executables loaded into any process\'s address space. We normally use this mechanism to detect and analyze high-level language interpreters (e.g., libpython, libjvm) when they are loaded, but it also turned out to be a perfect fit to detect the OTel trace correlation library. When the library is detected in a process, we scan the exports, resolve the symbol, and read the per-process information directly from the instrumented process\\\\u2019 memory.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"exposing-per-thread-information\\",children:\\"Exposing per-thread information\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With the easy part out of the way, let\'s get to the nitty-gritty portion: exposing per-thread information via thread-local storage (TLS). So, what exactly is TLS, and how does it work? At the most basic level, the idea is to have \\",(0,t.jsx)(e.strong,{children:\\"one instance of a variable for every thread\\"}),\\". Semantically you can think of it like having a global Map, although that is not how it is implemented.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"On Linux, there are two major options for thread locals: TSD and TLS.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"thread-specific-data-tsd\\",children:\\"Thread-specific data (TSD)\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"TSD is the older and probably more commonly known variant. It works by explicitly allocating a key via pthread_key_create \\\\u2014 usually during process startup \\\\u2014 and passing it to all threads that require access to the thread-local variable. The threads can then pass that key to the pthread_getspecific and pthread_setspecific functions to read and update the variable for the currently running thread.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"TSD is simple, but for our purposes it has a range of drawbacks:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"The pthread_key_t structure is opaque and doesn\'t have a defined layout. Similar to the Java ThreadLocals, the underlying data-structures aren\'t defined by the ABI documents and different libc implementations (glibc, musl) will handle them differently.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"We cannot call a function like pthread_getspecific from BPF, so we\'d have to reverse engineer and reimplement the logic. Logic may change between libc versions, and we\\\\u2019d have to detect the version and support all variants that may come up in the wild.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"TSD performance is not predictable and varies depending on how many thread local variables have been allocated in the process previously. This may not be a huge concern for Java specifically since spans are typically not swapped super rapidly, but it\\\\u2019d likely be quite noticeable for user-mode scheduling languages where the context might need to be swapped at every await point/coroutine yield.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"None of this is strictly prohibitive, but a lot of this is annoying at the very least. Let\\\\u2019s see if we can do better!\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"thread-local-storage-tls\\",children:\\"Thread-local storage (TLS)\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Starting with C11 and C++11, both languages support thread local variables directly via the _Thread_local and thread_local storage specifiers, respectively. Declaring a variable as per-thread is now a matter of simply adding the keyword:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`thread_local void* elastic_tracecorr_tls_v1 = nullptr;\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You might assume that the compiler simply inserts calls to the corresponding pthread function calls when variables declared with this are accessed, but this is not actually the case. The reality is surprisingly complicated, and it turns out that there are four different models of TLS that the compiler can choose to generate. For some of those models, there are further multiple dialects that can be used to implement them. The different models and dialects come with various portability versus performance trade-offs. If you are interested in the details, I suggest reading this \\",(0,t.jsx)(e.a,{href:\\"https://maskray.me/blog/2021-02-14-all-about-thread-local-storage\\",rel:\\"nofollow\\",children:\\"blog article\\"}),\\" that does a great job at explaining them.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The TLS model and dialect are usually chosen by the compiler based on a somewhat opaque and complicated set of architecture-specific rules. Fortunately for us, both gcc and clang allow users to pick a particular one using the -ftls-model and -mtls-dialect arguments. The variant that we ended up picking for our purposes is -ftls-model=global-dynamic and -mtls-dialect=gnu2 (and desc on aarch64).\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\'s take a look at the assembly that is being generated when accessing a thread_local variable under these settings. Our function:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`void setThreadProfilingCorrelationBuffer(JNIEnv* jniEnv, jobject bytebuffer) {\\n if (bytebuffer == nullptr) {\\n elastic_tracecorr_tls_v1 = nullptr;\\n } else {\\n elastic_tracecorr_tls_v1 = jniEnv->GetDirectBufferAddress(bytebuffer);\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Is compiled to the following assembly code:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/continuous-profiling-distributed-tracing-correlation/assembly.png\\",alt:\\"assembly\\",width:\\"1600\\",height:\\"566\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Both possible branches assign a value to our thread-local variable. Let\\\\u2019s focus at the right branch corresponding to the nullptr case to get rid of the noise from the GetDirectBufferAddress function call:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`lea rax, elastic_tracecorr_tls_v1_tlsdesc ;; Load some pointer into rax.\\ncall qword ptr [rax] ;; Read & call function pointer at rax.\\nmov qword ptr fs:[rax], 0 ;; Assign 0 to the pointer returned by\\n ;; the function that we just called.\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The fs: portion of the mov instruction is the actual magic bit that makes the memory read per-thread. We\\\\u2019ll get to that later; let\\\\u2019s first look at the mysterious elastic_tracecorr_tls_v1_tlsdesc variable that the compiler emitted here. It\\\\u2019s an instance of the tlsdesc structure that is located somewhere in the .got.plt ELF section. The structure looks like this:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`struct tlsdesc {\\n // Function pointer used to retrieve the offset\\n uint64_t (*resolver)(tlsdesc*);\\n\\n // TLS offset -- more on that later.\\n uint64_t tp_offset;\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The resolver field is initialized with nullptr and tp_offset with a per-executable offset. The first thread-local variable in an executable will usually have offset 0, the next one sizeof(first_var), and so on. At first glance this may appear to be similar to how TSD works, with the call to pthread_getspecific to resolve the actual offset, but there is a crucial difference. When the library is loaded, the resolver field is filled in with the address of __tls_get_addr by the loader (ld.so). __tls_get_addr is a relatively heavy function that allocates a TLS offset that is globally unique between all shared libraries in the process. It then proceeds by updating the tlsdesc structure itself, inserting the global offset and replacing the resolver function with a trivial one:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`void* second_stage_resolver(tlsdesc* desc) {\\n return tlsdesc->tp_offset;\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In essence, this means that the first access to a tlsdesc based thread-local variable is rather expensive, but all subsequent ones are cheap. We further know that by the time that our C++ library starts publishing per-thread data, it must have gone through the initial resolving process already. Consequently, all that we need to do is to read the final offset from the process\'s memory and memorize it. We also refresh the offset every now and then to ensure that we really have the final offset, combating the unlikely but possible race condition that we read the offset before it was initialized. We can detect this case by comparing the resolver address against the address of the __tls_get_addr function exported by ld.so.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"determining-the-tls-offset-from-an-external-process\\",children:\\"Determining the TLS offset from an external process\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With that out of the way, the next question that arises is how to actually find the tlsdesc in memory so that we can read the offset. Intuitively one might expect that the dynamic symbol exported on the ELF file points to that descriptor, but that is not actually the case.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ readelf --wide --dyn-syms elastic-jvmti-linux-x64.so | grep elastic_tracecorr_tls_v1\\n328: 0000000000000000 \\t8 TLS \\tGLOBAL DEFAULT 19 elastic_tracecorr_tls_v1\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The dynamic symbol instead contains an offset relative to the start of the .tls ELF section and points to the initial value that libc initializes the TLS value with when it is allocated. So how does ld.so find the tlsdesc to fill in the initial resolver? In addition to the dynamic symbol, the compiler also emits a relocation record for our symbol, and that one actually points to the descriptor structure that we are looking for.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ readelf --relocs --wide elastic-jvmti-linux-x64.so | grep R_X86_64_TLSDESC\\n00000000000426e8 0000014800000024 R_X86_64_TLSDESC \\t0000000000000000\\nelastic_tracecorr_tls_v1 + 0\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"To read the final TLS offset, we thus simply have to:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Wait for the event notifying us about a new shared library being loaded into a process\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Do some cheap heuristics to detect our C++ library, avoiding the more expensive analysis below from being executed for every unrelated library on the system\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Analyze the library on disk and scan ELF relocations for our per-thread variable to extract the tlsdesc address\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Rebase that address to match where our library was loaded in that particular process\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Read the offset from tlsdesc+8\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"determining-the-tls-base\\",children:\\"Determining the TLS base\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that we have the offset, how do we use that to actually read the data that the library puts there for us? This brings us back to the magic fs: portion of the mov instruction that we discussed earlier. In X86, most memory operands can optionally be supplied with a segment register that influences the address translation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Segments are an archaic construct from the early days of 16-bit X86 where they were used to extend the address space. Essentially the architecture provides a range of segment registers that can be configured with different base addresses, thus allowing more than 16-bits worth of memory to be accessed. In times of 64-bit processors, this is hardly a concern anymore. In fact, X86-64 aka AMD64 got rid of all but two of those segment registers: fs and gs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"So why keep two of them? It turns out that they are quite useful for the use-case of thread-local data. Since every thread can be configured to have its own base address in these segment registers, we can use it to point to a block of data for this specific thread. That is precisely what libc implementations on Linux are doing with the fs segment. The offset that we snatched from the processes memory earlier is used as an address with the fs segment register, and the CPU automatically adds it to the per-thread base address.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To retrieve the base address pointed to by the fs segment register in the kernel, we need to read its destination from the kernel\\\\u2019s task_struct for the thread that we happened to interrupt with our profiling timer event. Getting the task struct is easy because we are blessed with the bpf_get_current_task BPF helper functions. BPF helpers are pretty much syscalls for BPF programs: we can just ask the Linux kernel to hand us the pointer.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Armed with the task pointer, we now have to read the thread.fsbase (X86-64) or thread.uw.tp_value (aarch64) field to get our desired base address that the user-mode process accesses via fs. This is where things get complicated one last time, at least if we wish to support older kernels without \\",(0,t.jsx)(e.a,{href:\\"https://www.kernel.org/doc/html/latest/bpf/btf.html\\",rel:\\"nofollow\\",children:\\"BTF support\\"}),\\" (we do!). The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/torvalds/linux/blob/259f7d5e2baf87fcbb4fabc46526c9c47fed1914/include/linux/sched.h#L748\\",rel:\\"nofollow\\",children:\\"task_struct is huge\\"}),\\" and there are hundreds of fields that can be present or not depending on how the kernel is configured. Being a core primitive of the scheduler, it is also constantly subject to changes between different kernel versions. On modern Linux distributions, the kernel is typically nice enough to tell us the offset via BTF. On older ones, the situation is more complicated. Since hardcoding the offset is clearly not an option if we hope the code to be portable, we instead have to figure out the offset by ourselves.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We do this by consulting /proc/kallsyms, a file with mappings between kernel functions and their addresses, and then using BPF to dump the compiled code of a kernel function that rarely changes and uses the desired offset. We dynamically disassemble and analyze the function and extract the offset directly from the assembly. For X86-64 specifically, we dump the \\",(0,t.jsx)(e.a,{href:\\"https://elixir.bootlin.com/linux/v5.9.16/source/arch/x86/kernel/hw_breakpoint.c#L452\\",rel:\\"nofollow\\",children:\\"aout_dump_debugregs\\"}),\\" function that accesses thread->ptrace_bps, which has consistently been 16 bytes away from the fsbase field that we are interested in for all kernels that we have ever looked at.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"reading-tls-data-from-kernel\\",children:\\"Reading TLS data from kernel\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With all the required offsets at our hands, we can now finally do what we set out to do in the first place: use them to enrich our stack traces with the OTel trace and span IDs that our C++ library prepared for us!\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`void maybe_add_otel_info(Trace* trace) {\\n // Did user-mode insert a TLS offset for this process? Read it.\\n TraceCorrProcInfo* proc = bpf_map_lookup_elem(&tracecorr_procs, &trace->pid);\\n\\n // No entry -> process doesn\'t have the C++ library loaded.\\n if (!proc) return;\\n\\n // Load the fsbase offset from our global configuration map.\\n u32 key = 0;\\n SystemConfig* syscfg = bpf_map_lookup_elem(&system_config, &key);\\n\\n // Read the fsbase offset from the kernel\'s task struct.\\n u8* fsbase;\\n u8* task = (u8*)bpf_get_current_task();\\n bpf_probe_read_kernel(&fsbase, sizeof(fsbase), task + syscfg->fsbase_offset);\\n\\n // Use the TLS offset to read the **pointer** to our TLS buffer.\\n void* corr_buf_ptr;\\n bpf_probe_read_user(\\n &corr_buf_ptr,\\n sizeof(corr_buf_ptr),\\n fsbase + proc->tls_offset\\n );\\n\\n // Read the information that our library prepared for us.\\n TraceCorrelationBuf corr_buf;\\n bpf_probe_read_user(&corr_buf, sizeof(corr_buf), corr_buf_ptr);\\n\\n // If the library reports that we are currently in a trace, store it into\\n // the stack trace that will be reported to our user-land process.\\n if (corr_buf.trace_present && corr_buf.valid) {\\n trace->otel_trace_id.as_int.hi = corr_buf.trace_id.as_int.hi;\\n trace->otel_trace_id.as_int.lo = corr_buf.trace_id.as_int.lo;\\n trace->otel_span_id.as_int = corr_buf.span_id.as_int;\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"sending-out-the-mappings\\",children:\\"Sending out the mappings\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"From this point on, everything further is pretty simple. The C++ library sets up a unix datagram socket during startup and communicates the socket path to the profiler via the per-process data block. The stacktraces annotated with the OTel trace and span IDs are sent from BPF to our user-mode profiler process via perf event buffers, which in turn sends the mappings between OTel span and trace and stack trace hashes to the C++ library. Our extensions to the OTel instrumentation framework then read those mappings and insert the stack trace hashes into the OTel trace.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This approach has a few major upsides compared to the perhaps more obvious alternative of sending out the OTel span and trace ID with the profiler\\\\u2019s stacktrace records. We want the stacktrace associations to be stored in the trace indices to allow filtering and aggregating stacktraces by the plethora of fields available on OTel traces. If we were to send out the trace IDs via the profiler\'s gRPC connection instead, we\\\\u2019d have to search for and update the corresponding OTel trace records in the profiling collector to insert the stack trace hashes.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is not trivial: stacktraces are sent out rather frequently (every 5 seconds, as of writing) and the corresponding OTel trace might not have been sent and stored by the time the corresponding stack traces arrive in our cluster. We\\\\u2019d have to build a kind of delay queue and periodically retry updating the OTel trace documents, introducing avoidable database work and complexity in the collectors. With the approach of sending stacktrace mappings to the OTel instrumented process instead, the need for server-side merging vanishes entirely.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"trace-correlation-in-action\\",children:\\"Trace correlation in action\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With all the hard work out of the way, let\\\\u2019s take a look at what trace correlation looks like in action!\\"}),`\\n`,(0,t.jsx)(n,{vidyardUuid:\\"JYTzQYeiJ6CK6K3hZ33sz5\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"future-work-supporting-other-languages\\",children:\\"Future work: Supporting other languages\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We have demonstrated that trace correlation can work nicely for Java, but we have no intention of stopping there. The general approach that we discussed previously should work for any language that can efficiently load and call into our C++ library and doesn\\\\u2019t do user-mode scheduling with coroutines. The problem with user-mode scheduling is that the logical thread can change at any await/yield point, requiring us to update the trace IDs in TLS. Many such coroutine environments like Rust\\\\u2019s Tokio provide the ability to register a callback for whenever the active task is swapped, so they can be supported easily. Other languages, however, do not provide that option.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"One prominent example in that category is Go: goroutines are built on user-mode scheduling, but to our knowledge there\\\\u2019s no way to instrument the scheduler. Such languages will need solutions that don\\\\u2019t go via the generic TLS path. For Go specifically, we have already built a prototype that uses pprof labels that are associated with a specific Goroutine, having Go\\\\u2019s scheduler update them for us automatically.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"getting-started\\",children:\\"Getting started\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We hope this blog post has given you an overview of correlating profiling signals to distributed tracing, and its benefits for end-users.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To get started, download the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java\\",rel:\\"nofollow\\",children:\\"Elastic distribution of the OTel agent\\"}),\\", which contains the new trace correlation library. Additionally, you will need the latest version of Universal Profiling agent, bundled with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-8-13-0\\",rel:\\"nofollow\\",children:\\"Elastic Stack version 8.13\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"acknowledgment\\",children:\\"Acknowledgment\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We appreciate \\",(0,t.jsx)(e.a,{href:\\"https://github.com/trask\\",rel:\\"nofollow\\",children:\\"Trask Stalnaker\\"}),\\", maintainer of the OTel Java agent, for his feedback on our approach and for reviewing the early draft of this blog post.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(a={}){let{wrapper:e}=a.components||{};return e?(0,t.jsx)(e,{...a,children:(0,t.jsx)(h,{...a})}):h(a)}function T(a,e){throw new Error(\\"Expected \\"+(e?\\"component\\":\\"object\\")+\\" `\\"+a+\\"` to be defined: you likely forgot to import, pass, or provide it.\\")}return v(k);})();\\n;return Component;"},"_id":"articles/continuous-profiling-distributed-tracing-correlation.mdx","_raw":{"sourceFilePath":"articles/continuous-profiling-distributed-tracing-correlation.mdx","sourceFileName":"continuous-profiling-distributed-tracing-correlation.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/continuous-profiling-distributed-tracing-correlation"},"type":"Article","imageUrl":"/assets/images/continuous-profiling-distributed-tracing-correlation/Under_highway_bridge.jpg","readingTime":"20 min read","url":"/continuous-profiling-distributed-tracing-correlation","headings":[{"level":2,"title":"Bridging the disconnect between continuous profiling and OTel traces","href":"#bridging-the-disconnect-between-continuous-profiling-and-otel-traces"},{"level":2,"title":"Figuring out the active OTel trace and span","href":"#figuring-out-the-active-otel-trace-and-span"},{"level":2,"title":"Exposing per-process information","href":"#exposing-per-process-information"},{"level":2,"title":"Exposing per-thread information","href":"#exposing-per-thread-information"},{"level":2,"title":"Thread-specific data (TSD)","href":"#thread-specific-data-tsd"},{"level":2,"title":"Thread-local storage (TLS)","href":"#thread-local-storage-tls"},{"level":2,"title":"Determining the TLS offset from an external process","href":"#determining-the-tls-offset-from-an-external-process"},{"level":2,"title":"Determining the TLS base","href":"#determining-the-tls-base"},{"level":2,"title":"Reading TLS data from kernel","href":"#reading-tls-data-from-kernel"},{"level":2,"title":"Sending out the mappings","href":"#sending-out-the-mappings"},{"level":2,"title":"Trace correlation in action","href":"#trace-correlation-in-action"},{"level":2,"title":"Future work: Supporting other languages","href":"#future-work-supporting-other-languages"},{"level":2,"title":"Getting started","href":"#getting-started"},{"level":2,"title":"Acknowledgment","href":"#acknowledgment"}]},{"title":"Continuous profiling: The key to more efficient and cost-effective applications","slug":"continuous-profiling-efficient-cost-effective-applications","date":"2023-10-27","description":"In this post, we discuss why computational efficiency is important and how Elastic Universal Profiling enables your business to use continuous profiling in production environments to make the software that runs your business as efficient as possible.","image":"the-end-of-databases-A_(1).jpg","author":[{"slug":"john-knoepfle","type":"Author","_raw":{}}],"tags":[{"slug":"universal-profiling","type":"Tag","_raw":{}},{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"Recently, Elastic Universal ProfilingTM became [generally available](https://www.elastic.co/blog/continuous-profiling-is-generally-available). It is the part of our Observability solution that allows users to do _whole system, continuous profiling_ in production environments. If you\'re not familiar with continuous profiling, you are probably wondering what Universal Profiling is and why you should care. That\'s what we will address in this post.\\n\\n\\n\\n## Efficiency is important (again)\\nBefore we jump into continuous profiling, let\'s start with the \\"Why should I care?\\" question. To do that, I\'d like to talk a bit about efficiency and some large-scale trends happening in our industry that are making efficiency, specifically computational efficiency, important again. I say again because in the past, when memory and storage on a computer was very limited and you had to worry about every byte of code, efficiency was an important aspect of developing software.\\n\\n\\n\\n### The end of Moore’s Law\\nFirst, the [Moore\'s Law](https://en.wikipedia.org/wiki/Moore\'s_law) era is drawing to a close. This was inevitable simply due to physical limits of how small you can make a transistor and the connections between them. For a long time, software developers had the luxury of not worrying about complexity and efficiency because the next generation of hardware would mitigate any negative cost or performance impact.\\n\\n_If you can\'t rely on an endless progression of ever faster hardware, you should be interested in computational efficiency._\\n\\n\\n\\n### The move to Software-as-a-Service\\nAnother trend to consider is the shift from software vendors that sold customers software to run themselves to Software-as-a-Service businesses. A traditional software vendor didn\'t have to worry too much about the efficiency of their code. That issue largely fell to the customer to address; a new software version might dictate a hardware refresh to the latest and most performant. For a SaaS business, inefficient software usually degrades the customer’s experience and it certainly impacts the bottom line.\\n\\n_If you are a SaaS business in a competitive environment, you should be interested in computational efficiency._\\n\\n\\n\\n### Cloud migration\\nNext is the ongoing [cloud migration](https://www.elastic.co/observability/cloud-migration) to cloud computing. One of the benefits of cloud computing is the ease of scaling, both hardware and software. In the cloud, we are not constrained by the limits of our data centers or the next hardware purchase. Instead we simply spin up more cloud instances to mitigate performance problems. In addition to infrastructure scalability, microservices architectures, containerization, and the rise of Kubernetes and similar orchestration tools means that scaling services is simpler than ever. It\'s not uncommon to have thousands of instances of a service running in a cloud environment. This ease of scaling accounts for another trend, namely that many businesses are dealing with skyrocketing cloud computing costs.\\n\\n_If you are a business with ever increasing cloud costs, you should be interested in computational efficiency._\\n\\n\\n\\n### Our changing climate\\nLastly, if none of those reasons pique your interest, let\'s consider a global problem that all of us should have in mind — namely, climate change. There are many things that need to be addressed to tackle climate change, but with our dependence on software in every part of our society, computational efficiency is certainly something we should be thinking about.\\n\\nThomas Dullien, distinguished engineer at Elastic and one of the founders of Optymize points out that if you can save 20% on 800 servers, and assume 300W power consumption for each server, that code change is worth 160 metric tons of CO2 saved per year. That may seem like a drop in the bucket but if all businesses focus more on computational efficiency, it will make an impact. Also, let\'s not forget the financial benefits: those 160 metric tons of CO2 savings also represent a significant annual cost savings.\\n\\n_If you live on planet Earth, you should be interested in computational efficiency._\\n\\n\\n\\n## Performance engineering\\nWho\'s job is it to worry about computational efficiency? Application developers usually pay at least some attention to efficiency as they develop their code. Profiling is a common approach for a developer to understand the performance of their code, and there is an entire portfolio of profiling tools available. Frequently, however, schedule pressures trump time spent on performance analysis and computational efficiency. In addition, performance problems may not become apparent until an application is running at scale in production and interacting (and competing) with everything else in that environment. Many profiling tools are not well suited to use in a production environment because they require code instrumentation and recompilation and add significant overhead.\\n\\nWhen inefficient code makes it into production and begins to cause performance problems, the next line of defense is the Operations or SRE team. Their mission is to keep everything humming, and performance problems will certainly draw attention. Observability tools such as APM can shed light on these types of issues and lead the team to a specific application or service, but these tools have limits into the observability of the full system. Third-party libraries and operating system kernels functions remain hidden without a profiling solution in the production environment.\\n\\nSo, what can these teams do when there is a need to investigate a performance problem in production? That\'s where continuous profiling comes into the picture.\\n\\n\\n\\n## Continuous profiling\\nContinuous profiling is not a new idea. Google published a [paper about it](https://research.google/pubs/pub36575/) in 2010 and began implementing continuous profiling in its environments around that time. Facebook and Netflix followed suit not long afterward.\\n\\nTypically, continuous profiling tools have been the domain of dedicated performance engineering or operating system engineering teams, which are usually only found at extremely large scale enterprises like the ones mentioned above. The key idea is to run profiling on every server, all of the time. That way, when your observability tools point you to a specific part of an application, but you need a more detailed view into exactly where that application is consuming CPU resources, the profiling data will be there, ready to use.\\n\\nAnother benefit of continuous profiling is that it provides a view of CPU intensive software across your entire environment — whether that is a very CPU intensive function or the aggregate of a relatively small function that is run thousands of times a second in your environment.\\n\\nWhile profiling tools are not new, most of them have significant gaps. Let\'s look at a couple of the most significant ones.\\n\\n- **Limited visibility.** Modern distributed applications are composed of a complex mix of building blocks, including custom software functions, third-party software libraries, networking software, operating system services, and more and more often, orchestration software such as [Kubernetes](https://kubernetes.io/). To fully understand what is happening in an application, you need visibility into each piece. However, even if a developer has the ability to profile their own code, everything else remains invisible. To make matters worse, most profiling tools require instrumenting the code, which adds overhead and therefore even your developers’ code is not profiled in production.\\n- **Missing symbols in production.** All of these pieces of code building blocks typically have descriptive names (some more intuitive than others) so that developers can understand and make sense of them. In a running program, these descriptive names are usually referred to as **symbols**. For a human being to make sense of the execution of a running application, these names are very important. Unfortunately, almost always, any software running in production has these human readable symbols stripped away for space efficiency since they are not needed by the CPU executing the software. Without all of the symbols, it makes it much more difficult to understand the full picture of what\'s happening in the application. To illustrate this, think of the last time you were in an SMS chat on your mobile device and you only had some of the people in the chat group in your address book while the rest simply appeared as phone numbers — this makes it very hard to tell who is saying what.\\n\\n\\n## Elastic Universal Profiling: Continuous profiling for all\\nOur goal is to allow any business, large or small, to make computational efficiency a core consideration for all of the software that they run. Universal Profiling imposes very low overhead on your servers so it can be used in production and it provides visibility to everything running on every machine. It opens up the possibility of seeing the financial unit cost and CO2 impact of every line of code running on every system in your business. How do we do that?\\n\\n\\n\\n### Whole-system visibility — SIMPLE\\nUniversal Profiling is based on [eBPF](https://www.elastic.co/blog/ebpf-observability-security-workload-profiling), which means that it imposes very low overhead (our goal is less than 1% CPU and less than 250MB of RAM) on your servers because it doesn\'t require code instrumentation. That low overhead means it can be run continuously, on every server, even in production.\\n\\neBPF also lets us deploy a single profiler agent on a host and peek inside the operating system to see every line of code executing on the CPU. That means we have visibility into all of those application building blocks described above — the operating system itself as well as [containerization and orchestration frameworks](https://en.wikipedia.org/wiki/Containerization_(computing)) without complex configuration.\\n\\n\\n\\n### All the symbols\\nA key part of Universal Profiling is our hosted symbolization service. This means that symbols are not required on your servers, which not only eliminates a need for recompiling software with symbols, but it also helps to reduce overhead by allowing the Universal Profiling agent to send very sparse data back to the Elasticsearch platform where it is enriched with all of the missing symbols. Since we maintain a repository of most popular third-party software libraries and Linux operating system symbols, the Universal Profiling UI can show you all the symbols.\\n\\n\\n\\n### Your favorite language, and then some\\nUniversal Profiling is multilanguage. We support all of today’s popular programming languages, including Python, Go, Java (and any other JVM-based languages), Ruby, NodeJS, PHP, Perl, and of course, C and C++, which is critical since these languages still underly so many third-party libraries used by the other languages. In addition, we support profiling [native code](https://en.wikipedia.org/wiki/Machine_code) a.k.a. machine language.\\n\\nSpeaking of native code, all profiling tools are tied to a specific type of CPU. Most tools today only support the Intel x86 CPU architecture. Universal Profiling supports both x86 and ARM-based processors. With the expanding use of ARM-based servers, especially in cloud environments, Universal Profiling future-proofs your continuous profiling.\\n\\n\\n\\n![A flamegraph showing traces across Python, Native, Kernel, and Java code](/assets/images/continuous-profiling-efficient-cost-effective-applications/elastic-blog-1-universal-profiling.png)\\n\\nMany businesses today employ polyglot programming — that is, they use multiple languages to build an application — and Universal Profiling is the only profiler available that can build a holistic view across all of these languages. This will help you look for hotspots in the environment, leading you to \\"unknown unknowns\\" that warrant deeper performance analysis. That might be a simple interest rate calculation that should be efficient and lightweight but, surprisingly, isn\'t. Or perhaps it is a service that is reused much more frequently than originally expected, resulting in thousands of instances running across your environment every second, making it a prime target for efficiency improvement.\\n\\n\\n\\n### Visualize your impact\\nElastic Universal Profiling has an intuitive UI that immediately shows you the impact of any given function, including the time it spends executing on the CPU and how much that costs both in dollars and in carbon emissions.\\n\\n\\n\\n![Annualized dollar cost and CO2 emissions for any function](/assets/images/continuous-profiling-efficient-cost-effective-applications/elastic-blog-2-universal-profiling-flamegraph.png)\\n\\nFinally, with the level of software complexity in most production environments, there\'s a good chance that making a code change will have unanticipated effects across the environment. That code change may be due to a new feature being rolled out or a change to improve efficiency. In either case, a differential view, before and after the change, will help you understand the impact.\\n\\n\\n\\n![Performance, CO2, and cost improvements of a more efficient hashing function](/assets/images/continuous-profiling-efficient-cost-effective-applications/elastic-blog-3.png)\\n\\n## Let\'s recap\\nComputational efficiency is an important topic, both from the perspective of the ultra-competitive business climate we all work in and from living through the challenges of our planet\'s changing climate. Improving efficiency can be a challenging endeavor, but we can\'t even begin to attempt to make improvements without knowing where to focus our efforts. Elastic Universal Profiling is here to provide every business with visibility into computational efficiency.\\n\\nHow will you use Elastic Universal Profiling in your business?\\n\\n- If you are an application developer or part of the site reliability team, Universal Profiling will provide you with unprecedented visibility into your applications that will not only help you troubleshoot performance problems in production, but also understand the impact of new features and deliver an optimal user experience.\\n- If you are involved in cloud and infrastructure financial management and capacity planning, Universal Profiling will provide you with unprecedented visibility into the unit cost of every line of code that your business runs.\\n- If you are involved in your business’s [ESG](https://www.elastic.co/blog/sustainability-elastic-6-months-reflection) initiative, Universal Profiling will provide you with unprecedented visibility into your CO2 emissions and open up new avenues for reducing your carbon footprint.\\n\\nThese are just a few examples. For more ideas, read how [AppOmni benefits from Elastic Universal Profiling](https://www.elastic.co/customers/appomni).\\n\\nYou can [get started](https://www.elastic.co/guide/en/observability/current/profiling-get-started.html) with Elastic Universal Profiling right now!\\n\\n\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n\\n\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),v=(n,e)=>{for(var o in e)a(n,o,{get:e[o],enumerable:!0})},s=(n,e,o,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let t of f(e))!g.call(n,t)&&t!==o&&a(n,t,{get:()=>e[t],enumerable:!(r=p(e,t))||r.enumerable});return n};var w=(n,e,o)=>(o=n!=null?u(m(n)):{},s(e||!n||!n.__esModule?a(o,\\"default\\",{value:n,enumerable:!0}):o,n)),b=n=>s(a({},\\"__esModule\\",{value:!0}),n);var c=y((U,l)=>{l.exports=_jsx_runtime});var P={};v(P,{default:()=>d,frontmatter:()=>k});var i=w(c()),k={title:\\"Continuous profiling: The key to more efficient and cost-effective applications\\",slug:\\"continuous-profiling-efficient-cost-effective-applications\\",date:\\"2023-10-27\\",description:\\"In this post, we discuss why computational efficiency is important and how Elastic Universal Profiling enables your business to use continuous profiling in production environments to make the software that runs your business as efficient as possible.\\",author:[{slug:\\"john-knoepfle\\"}],image:\\"the-end-of-databases-A_(1).jpg\\",tags:[{slug:\\"universal-profiling\\"},{slug:\\"cloud-monitoring\\"},{slug:\\"apm\\"}]};function h(n){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(e.p,{children:[\\"Recently, Elastic Universal Profiling\\",(0,i.jsx)(\\"sup\\",{children:\\"TM\\"}),\\" became \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-is-generally-available\\",rel:\\"nofollow\\",children:\\"generally available\\"}),\\". It is the part of our Observability solution that allows users to do \\",(0,i.jsx)(e.em,{children:\\"whole system, continuous profiling\\"}),\\" in production environments. If you\'re not familiar with continuous profiling, you are probably wondering what Universal Profiling is and why you should care. That\'s what we will address in this post.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"efficiency-is-important-again\\",children:\\"Efficiency is important (again)\\"}),`\\n`,(0,i.jsx)(e.p,{children:`Before we jump into continuous profiling, let\'s start with the \\"Why should I care?\\" question. To do that, I\'d like to talk a bit about efficiency and some large-scale trends happening in our industry that are making efficiency, specifically computational efficiency, important again. I say again because in the past, when memory and storage on a computer was very limited and you had to worry about every byte of code, efficiency was an important aspect of developing software.`}),`\\n`,(0,i.jsx)(e.h3,{id:\\"the-end-of-moores-law\\",children:\\"The end of Moore\\\\u2019s Law\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"First, the \\",(0,i.jsx)(e.a,{href:\\"https://en.wikipedia.org/wiki/Moore\'s_law\\",rel:\\"nofollow\\",children:\\"Moore\'s Law\\"}),\\" era is drawing to a close. This was inevitable simply due to physical limits of how small you can make a transistor and the connections between them. For a long time, software developers had the luxury of not worrying about complexity and efficiency because the next generation of hardware would mitigate any negative cost or performance impact.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"If you can\'t rely on an endless progression of ever faster hardware, you should be interested in computational efficiency.\\"})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"the-move-to-software-as-a-service\\",children:\\"The move to Software-as-a-Service\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Another trend to consider is the shift from software vendors that sold customers software to run themselves to Software-as-a-Service businesses. A traditional software vendor didn\'t have to worry too much about the efficiency of their code. That issue largely fell to the customer to address; a new software version might dictate a hardware refresh to the latest and most performant. For a SaaS business, inefficient software usually degrades the customer\\\\u2019s experience and it certainly impacts the bottom line.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"If you are a SaaS business in a competitive environment, you should be interested in computational efficiency.\\"})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"cloud-migration\\",children:\\"Cloud migration\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Next is the ongoing \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/cloud-migration\\",rel:\\"nofollow\\",children:\\"cloud migration\\"}),\\" to cloud computing. One of the benefits of cloud computing is the ease of scaling, both hardware and software. In the cloud, we are not constrained by the limits of our data centers or the next hardware purchase. Instead we simply spin up more cloud instances to mitigate performance problems. In addition to infrastructure scalability, microservices architectures, containerization, and the rise of Kubernetes and similar orchestration tools means that scaling services is simpler than ever. It\'s not uncommon to have thousands of instances of a service running in a cloud environment. This ease of scaling accounts for another trend, namely that many businesses are dealing with skyrocketing cloud computing costs.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"If you are a business with ever increasing cloud costs, you should be interested in computational efficiency.\\"})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"our-changing-climate\\",children:\\"Our changing climate\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Lastly, if none of those reasons pique your interest, let\'s consider a global problem that all of us should have in mind \\\\u2014 namely, climate change. There are many things that need to be addressed to tackle climate change, but with our dependence on software in every part of our society, computational efficiency is certainly something we should be thinking about.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Thomas Dullien, distinguished engineer at Elastic and one of the founders of Optymize points out that if you can save 20% on 800 servers, and assume 300W power consumption for each server, that code change is worth 160 metric tons of CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" saved per year. That may seem like a drop in the bucket but if all businesses focus more on computational efficiency, it will make an impact. Also, let\'s not forget the financial benefits: those 160 metric tons of CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" savings also represent a significant annual cost savings.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"If you live on planet Earth, you should be interested in computational efficiency.\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"performance-engineering\\",children:\\"Performance engineering\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Who\'s job is it to worry about computational efficiency? Application developers usually pay at least some attention to efficiency as they develop their code. Profiling is a common approach for a developer to understand the performance of their code, and there is an entire portfolio of profiling tools available. Frequently, however, schedule pressures trump time spent on performance analysis and computational efficiency. In addition, performance problems may not become apparent until an application is running at scale in production and interacting (and competing) with everything else in that environment. Many profiling tools are not well suited to use in a production environment because they require code instrumentation and recompilation and add significant overhead.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"When inefficient code makes it into production and begins to cause performance problems, the next line of defense is the Operations or SRE team. Their mission is to keep everything humming, and performance problems will certainly draw attention. Observability tools such as APM can shed light on these types of issues and lead the team to a specific application or service, but these tools have limits into the observability of the full system. Third-party libraries and operating system kernels functions remain hidden without a profiling solution in the production environment.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"So, what can these teams do when there is a need to investigate a performance problem in production? That\'s where continuous profiling comes into the picture.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"continuous-profiling\\",children:\\"Continuous profiling\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Continuous profiling is not a new idea. Google published a \\",(0,i.jsx)(e.a,{href:\\"https://research.google/pubs/pub36575/\\",rel:\\"nofollow\\",children:\\"paper about it\\"}),\\" in 2010 and began implementing continuous profiling in its environments around that time. Facebook and Netflix followed suit not long afterward.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Typically, continuous profiling tools have been the domain of dedicated performance engineering or operating system engineering teams, which are usually only found at extremely large scale enterprises like the ones mentioned above. The key idea is to run profiling on every server, all of the time. That way, when your observability tools point you to a specific part of an application, but you need a more detailed view into exactly where that application is consuming CPU resources, the profiling data will be there, ready to use.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Another benefit of continuous profiling is that it provides a view of CPU intensive software across your entire environment \\\\u2014 whether that is a very CPU intensive function or the aggregate of a relatively small function that is run thousands of times a second in your environment.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"While profiling tools are not new, most of them have significant gaps. Let\'s look at a couple of the most significant ones.\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Limited visibility.\\"}),\\" Modern distributed applications are composed of a complex mix of building blocks, including custom software functions, third-party software libraries, networking software, operating system services, and more and more often, orchestration software such as \\",(0,i.jsx)(e.a,{href:\\"https://kubernetes.io/\\",rel:\\"nofollow\\",children:\\"Kubernetes\\"}),\\". To fully understand what is happening in an application, you need visibility into each piece. However, even if a developer has the ability to profile their own code, everything else remains invisible. To make matters worse, most profiling tools require instrumenting the code, which adds overhead and therefore even your developers\\\\u2019 code is not profiled in production.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Missing symbols in production.\\"}),\\" All of these pieces of code building blocks typically have descriptive names (some more intuitive than others) so that developers can understand and make sense of them. In a running program, these descriptive names are usually referred to as \\",(0,i.jsx)(e.strong,{children:\\"symbols\\"}),\\". For a human being to make sense of the execution of a running application, these names are very important. Unfortunately, almost always, any software running in production has these human readable symbols stripped away for space efficiency since they are not needed by the CPU executing the software. Without all of the symbols, it makes it much more difficult to understand the full picture of what\'s happening in the application. To illustrate this, think of the last time you were in an SMS chat on your mobile device and you only had some of the people in the chat group in your address book while the rest simply appeared as phone numbers \\\\u2014 this makes it very hard to tell who is saying what.\\"]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"elastic-universal-profiling-continuous-profiling-for-all\\",children:\\"Elastic Universal Profiling: Continuous profiling for all\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Our goal is to allow any business, large or small, to make computational efficiency a core consideration for all of the software that they run. Universal Profiling imposes very low overhead on your servers so it can be used in production and it provides visibility to everything running on every machine. It opens up the possibility of seeing the financial unit cost and CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" impact of every line of code running on every system in your business. How do we do that?\\"]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"whole-system-visibility--simple\\",children:\\"Whole-system visibility \\\\u2014 SIMPLE\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Universal Profiling is based on \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ebpf-observability-security-workload-profiling\\",rel:\\"nofollow\\",children:\\"eBPF\\"}),\\", which means that it imposes very low overhead (our goal is less than 1% CPU and less than 250MB of RAM) on your servers because it doesn\'t require code instrumentation. That low overhead means it can be run continuously, on every server, even in production.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"eBPF also lets us deploy a single profiler agent on a host and peek inside the operating system to see every line of code executing on the CPU. That means we have visibility into all of those application building blocks described above \\\\u2014 the operating system itself as well as \\",(0,i.jsx)(e.a,{href:\\"https://en.wikipedia.org/wiki/Containerization_(computing)\\",rel:\\"nofollow\\",children:\\"containerization and orchestration frameworks\\"}),\\" without complex configuration.\\"]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"all-the-symbols\\",children:\\"All the symbols\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"A key part of Universal Profiling is our hosted symbolization service. This means that symbols are not required on your servers, which not only eliminates a need for recompiling software with symbols, but it also helps to reduce overhead by allowing the Universal Profiling agent to send very sparse data back to the Elasticsearch platform where it is enriched with all of the missing symbols. Since we maintain a repository of most popular third-party software libraries and Linux operating system symbols, the Universal Profiling UI can show you all the symbols.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"your-favorite-language-and-then-some\\",children:\\"Your favorite language, and then some\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Universal Profiling is multilanguage. We support all of today\\\\u2019s popular programming languages, including Python, Go, Java (and any other JVM-based languages), Ruby, NodeJS, PHP, Perl, and of course, C and C++, which is critical since these languages still underly so many third-party libraries used by the other languages. In addition, we support profiling \\",(0,i.jsx)(e.a,{href:\\"https://en.wikipedia.org/wiki/Machine_code\\",rel:\\"nofollow\\",children:\\"native code\\"}),\\" a.k.a. machine language.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Speaking of native code, all profiling tools are tied to a specific type of CPU. Most tools today only support the Intel x86 CPU architecture. Universal Profiling supports both x86 and ARM-based processors. With the expanding use of ARM-based servers, especially in cloud environments, Universal Profiling future-proofs your continuous profiling.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/continuous-profiling-efficient-cost-effective-applications/elastic-blog-1-universal-profiling.png\\",alt:\\"A flamegraph showing traces across Python, Native, Kernel, and Java code\\",width:\\"1513\\",height:\\"1239\\"})}),`\\n`,(0,i.jsx)(e.p,{children:`Many businesses today employ polyglot programming \\\\u2014 that is, they use multiple languages to build an application \\\\u2014 and Universal Profiling is the only profiler available that can build a holistic view across all of these languages. This will help you look for hotspots in the environment, leading you to \\"unknown unknowns\\" that warrant deeper performance analysis. That might be a simple interest rate calculation that should be efficient and lightweight but, surprisingly, isn\'t. Or perhaps it is a service that is reused much more frequently than originally expected, resulting in thousands of instances running across your environment every second, making it a prime target for efficiency improvement.`}),`\\n`,(0,i.jsx)(e.h3,{id:\\"visualize-your-impact\\",children:\\"Visualize your impact\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic Universal Profiling has an intuitive UI that immediately shows you the impact of any given function, including the time it spends executing on the CPU and how much that costs both in dollars and in carbon emissions.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/continuous-profiling-efficient-cost-effective-applications/elastic-blog-2-universal-profiling-flamegraph.png\\",alt:\\"Annualized dollar cost and CO2 emissions for any function\\",width:\\"669\\",height:\\"791\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Finally, with the level of software complexity in most production environments, there\'s a good chance that making a code change will have unanticipated effects across the environment. That code change may be due to a new feature being rolled out or a change to improve efficiency. In either case, a differential view, before and after the change, will help you understand the impact.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/continuous-profiling-efficient-cost-effective-applications/elastic-blog-3.png\\",alt:\\"Performance, CO2, and cost improvements of a more efficient hashing function\\",width:\\"1931\\",height:\\"1111\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"lets-recap\\",children:\\"Let\'s recap\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Computational efficiency is an important topic, both from the perspective of the ultra-competitive business climate we all work in and from living through the challenges of our planet\'s changing climate. Improving efficiency can be a challenging endeavor, but we can\'t even begin to attempt to make improvements without knowing where to focus our efforts. Elastic Universal Profiling is here to provide every business with visibility into computational efficiency.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"How will you use Elastic Universal Profiling in your business?\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"If you are an application developer or part of the site reliability team, Universal Profiling will provide you with unprecedented visibility into your applications that will not only help you troubleshoot performance problems in production, but also understand the impact of new features and deliver an optimal user experience.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"If you are involved in cloud and infrastructure financial management and capacity planning, Universal Profiling will provide you with unprecedented visibility into the unit cost of every line of code that your business runs.\\"}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"If you are involved in your business\\\\u2019s \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/sustainability-elastic-6-months-reflection\\",rel:\\"nofollow\\",children:\\"ESG\\"}),\\" initiative, Universal Profiling will provide you with unprecedented visibility into your CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" emissions and open up new avenues for reducing your carbon footprint.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"These are just a few examples. For more ideas, read how \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/customers/appomni\\",rel:\\"nofollow\\",children:\\"AppOmni benefits from Elastic Universal Profiling\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"You can \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/profiling-get-started.html\\",rel:\\"nofollow\\",children:\\"get started\\"}),\\" with Elastic Universal Profiling right now!\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,i.jsx)(e,{...n,children:(0,i.jsx)(h,{...n})}):h(n)}return b(P);})();\\n;return Component;"},"_id":"articles/continuous-profiling-efficient-cost-effective-applications.mdx","_raw":{"sourceFilePath":"articles/continuous-profiling-efficient-cost-effective-applications.mdx","sourceFileName":"continuous-profiling-efficient-cost-effective-applications.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/continuous-profiling-efficient-cost-effective-applications"},"type":"Article","imageUrl":"/assets/images/continuous-profiling-efficient-cost-effective-applications/the-end-of-databases-A_(1).jpg","readingTime":"12 min read","url":"/continuous-profiling-efficient-cost-effective-applications","headings":[{"level":2,"title":"Efficiency is important (again)","href":"#efficiency-is-important-again"},{"level":3,"title":"The end of Moore’s Law","href":"#the-end-of-moores-law"},{"level":3,"title":"The move to Software-as-a-Service","href":"#the-move-to-software-as-a-service"},{"level":3,"title":"Cloud migration","href":"#cloud-migration"},{"level":3,"title":"Our changing climate","href":"#our-changing-climate"},{"level":2,"title":"Performance engineering","href":"#performance-engineering"},{"level":2,"title":"Continuous profiling","href":"#continuous-profiling"},{"level":2,"title":"Elastic Universal Profiling: Continuous profiling for all","href":"#elastic-universal-profiling-continuous-profiling-for-all"},{"level":3,"title":"Whole-system visibility — SIMPLE","href":"#whole-system-visibility--simple"},{"level":3,"title":"All the symbols","href":"#all-the-symbols"},{"level":3,"title":"Your favorite language, and then some","href":"#your-favorite-language-and-then-some"},{"level":3,"title":"Visualize your impact","href":"#visualize-your-impact"},{"level":2,"title":"Let\'s recap","href":"#lets-recap"}]},{"title":"How to capture custom metrics without app code changes using the Java Agent Plugin","slug":"custom-metrics-app-code-java-agent-plugin","date":"2023-07-10","description":"When the application you\'re monitoring doesn\'t emit the custom metrics you\'d like, and you can\'t directly change the app code, you can use a Java Agent Plugin to automatically instrument the application and emit the custom metrics you desire.","image":"capture-custom-metrics-blog-720x420.jpeg","author":[{"slug":"jack-shirazi","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"java","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe Elastic APM Java Agent automatically tracks [many metrics](https://www.elastic.co/guide/en/apm/agent/java/current/metrics.html), including those that are generated through [Micrometer](https://micrometer.io/) or the [OpenTelemetry Metrics API](https://opentelemetry.io/docs/specs/otel/metrics/api/). So if your application (or the libraries it includes) already exposes metrics from one of those APIs, installing the Elastic APM Java Agent is the only step required to capture them. You\'ll be able to visualize and configure thresholds, alerts, and anomaly detection — and anything else you want to use them for!\\n\\nThe next simplest option is to generate custom metrics directly from your code (e.g., by adding code using the [OpenTelemetry Metrics API](https://opentelemetry.io/docs/specs/otel/metrics/api/) directly into the application). The major downside of that approach is that it requires modifying the application, so if you can\'t or don\'t want to do that, you can easily produce the desired custom metrics by adding instrumentation to the Elastic APM Java Agent via a plugin.\\n\\nThis article deals with the situation where the application you are monitoring doesn\'t emit the custom metrics you\'d like it to, and you can\'t directly change the code or config to make it do so. Instead, you can use a plugin to automatically instrument the application via the Elastic APM Java Agent, which will then make the application emit the custom metrics you desire.\\n\\n![Using Elastic Kibana Lens to analyze APM telemetry on various measures](/assets/images/custom-metrics-app-code-java-agent-plugin/elastic-blog-1-kibana-lens.png)\\n\\n## Plugin basics\\n\\nThe basics of the Elastic APM Java Agent, and how to easily plugin instrumentation, are detailed in the article \\"[Create your own instrumentation with the Java Agent Plugin](https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin).\\" Generating metrics from a plugin is just another type of instrumentation, and the referenced article provides detailed step-by-step instructions with a worked example of how to create a plugin with custom instrumentation.\\n\\nFor this article, I assume you understand how to create a plugin with custom instrumentation based on that previous article, as well as the example application (a simple webserver [ExampleBasicHttpServer](https://github.com/elastic/apm-agent-java-plugin-example/blob/main/application/src/main/java/co/elastic/apm/example/webserver/ExampleBasicHttpServer.java)) from our [plugin example repo](https://github.com/elastic/apm-agent-java-plugin-example).\\n\\n## The custom metric\\n\\nFor our example application, which is an HTTP server ([ExampleBasicHttpServer](https://github.com/elastic/apm-agent-java-plugin-example/blob/main/application/src/main/java/co/elastic/apm/example/webserver/ExampleBasicHttpServer.java)) we\'d like to add a custom metric \'page_views\' which increments each time the [ExampleBasicHttpServer](https://github.com/elastic/apm-agent-java-plugin-example/blob/main/application/src/main/java/co/elastic/apm/example/webserver/ExampleBasicHttpServer.java) application handles any request. That means the instrumentation we\'ll add will be triggered by the same ExampleBasicHttpServer.handleRequest() method used in \\"[Create your own instrumentation with the Java Agent Plugin](https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin).\\"\\n\\n![A 15-minute line visualization of the page_views metric using Elastic APM](/assets/images/custom-metrics-app-code-java-agent-plugin/elastic-blog-2-15m-vis.png)\\n\\n## Using the Plugin/OpenTelemetry API\\n\\nEssentially the only difference to that article is that for metrics, we\'ll use the [OpenTelemetry _metrics_ API](https://opentelemetry.io/docs/specs/otel/metrics/api/) instead of the [OpenTelemetry _tracing_ API](https://opentelemetry.io/docs/instrumentation/java/manual/).\\n\\nIn particular for the metrics, the advice method for the handleRequest() method is the following code:\\n\\n```java\\nif (pageViewCounter == null) {\\n pageViewCounter = GlobalOpenTelemetry\\n .getMeter(\\"ExampleHttpServer\\")\\n .counterBuilder(\\"page_views\\")\\n .setDescription(\\"Page view count\\")\\n .build();\\n}\\npageViewCounter.add(1);\\n```\\n\\nThat is, lazily create the meter when it\'s first needed, and then on each invocation of the ExampleBasicHttpServer.handleRequest() method, increment the page view counter.\\n\\nEverything else — setting up instrumentation, finding the method to instrument, building the plugin — is the same as in the article \\"\\n\\n[Create your own instrumentation with the Java Agent Plugin](https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin).\\" The full metrics example is implemented in the [plugin example repo](https://github.com/elastic/apm-agent-java-plugin-example), and the actual full metrics instrumentation implementation is [ExampleMetricsInstrumentation](https://github.com/elastic/apm-agent-java-plugin-example/blob/main/plugin/src/main/java/co/elastic/apm/example/webserver/plugin/ExampleMetricsInstrumentation.java).\\n\\n![A 15-minute bar chart visualization of the page_views metric using Elastic APM](/assets/images/custom-metrics-app-code-java-agent-plugin/elastic-blog-3-bar-chart.png)\\n\\n## Try it out!\\n\\nThat\'s it! To run the agent with the plugin, just build and include the jar as described in \\"[Create your own instrumentation with the Java Agent Plugin](https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin),\\" in the directory specified by the plugins_dir configuration option. The [plugin example repo](https://github.com/elastic/apm-agent-java-plugin-example) provides a full tested implementation — just clone it and mvn install to see it working.\\n\\nThe best place to get started with Elastic APM is in the cloud. Begin your [free trial of Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-observability-application-performance-monitoring-page) today!\\n\\n> - The [Elastic APM Java Agent docs](https://www.elastic.co/guide/en/apm/agent/java/current/index.html)\\n> - The [Elastic APM Java Agent repo](https://github.com/elastic/apm-agent-java/)\\n> - The [plugin example](https://github.com/elastic/apm-agent-java-plugin-example) repo\\n> - The previous [Create your own instrumentation with the Java Agent Plugin](https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin) article\\n> - The associated [Regression testing your Java Agent Plugin](https://www.elastic.co/blog/regression-testing-your-java-agent-plugin) article\\n> - The [OpenTelemetry metrics API](https://opentelemetry.io/docs/specs/otel/metrics/api/)\\n> - The [OpenTelemetry tracing API](https://opentelemetry.io/docs/instrumentation/java/manual/)\\n> - [Micrometer](https://micrometer.io/)\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var m=Object.create;var l=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var d=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var f=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),v=(i,e)=>{for(var a in e)l(i,a,{get:e[a],enumerable:!0})},r=(i,e,a,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of d(e))!w.call(i,n)&&n!==a&&l(i,n,{get:()=>e[n],enumerable:!(o=u(e,n))||o.enumerable});return i};var y=(i,e,a)=>(a=i!=null?m(g(i)):{},r(e||!i||!i.__esModule?l(a,\\"default\\",{value:i,enumerable:!0}):a,i)),b=i=>r(l({},\\"__esModule\\",{value:!0}),i);var c=f((T,s)=>{s.exports=_jsx_runtime});var x={};v(x,{default:()=>p,frontmatter:()=>j});var t=y(c()),j={title:\\"How to capture custom metrics without app code changes using the Java Agent Plugin\\",slug:\\"custom-metrics-app-code-java-agent-plugin\\",date:\\"2023-07-10\\",description:\\"When the application you\'re monitoring doesn\'t emit the custom metrics you\'d like, and you can\'t directly change the app code, you can use a Java Agent Plugin to automatically instrument the application and emit the custom metrics you desire.\\",author:[{slug:\\"jack-shirazi\\"}],image:\\"capture-custom-metrics-blog-720x420.jpeg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"java\\"},{slug:\\"apm\\"}]};function h(i){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"The Elastic APM Java Agent automatically tracks \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/java/current/metrics.html\\",rel:\\"nofollow\\",children:\\"many metrics\\"}),\\", including those that are generated through \\",(0,t.jsx)(e.a,{href:\\"https://micrometer.io/\\",rel:\\"nofollow\\",children:\\"Micrometer\\"}),\\" or the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/metrics/api/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Metrics API\\"}),\\". So if your application (or the libraries it includes) already exposes metrics from one of those APIs, installing the Elastic APM Java Agent is the only step required to capture them. You\'ll be able to visualize and configure thresholds, alerts, and anomaly detection \\\\u2014 and anything else you want to use them for!\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The next simplest option is to generate custom metrics directly from your code (e.g., by adding code using the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/metrics/api/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Metrics API\\"}),\\" directly into the application). The major downside of that approach is that it requires modifying the application, so if you can\'t or don\'t want to do that, you can easily produce the desired custom metrics by adding instrumentation to the Elastic APM Java Agent via a plugin.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This article deals with the situation where the application you are monitoring doesn\'t emit the custom metrics you\'d like it to, and you can\'t directly change the code or config to make it do so. Instead, you can use a plugin to automatically instrument the application via the Elastic APM Java Agent, which will then make the application emit the custom metrics you desire.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/custom-metrics-app-code-java-agent-plugin/elastic-blog-1-kibana-lens.png\\",alt:\\"Using Elastic Kibana Lens to analyze APM telemetry on various measures\\",width:\\"1999\\",height:\\"1034\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"plugin-basics\\",children:\\"Plugin basics\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\'The basics of the Elastic APM Java Agent, and how to easily plugin instrumentation, are detailed in the article \\"\',(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Create your own instrumentation with the Java Agent Plugin\\"}),\'.\\" Generating metrics from a plugin is just another type of instrumentation, and the referenced article provides detailed step-by-step instructions with a worked example of how to create a plugin with custom instrumentation.\']}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For this article, I assume you understand how to create a plugin with custom instrumentation based on that previous article, as well as the example application (a simple webserver \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java-plugin-example/blob/main/application/src/main/java/co/elastic/apm/example/webserver/ExampleBasicHttpServer.java\\",rel:\\"nofollow\\",children:\\"ExampleBasicHttpServer\\"}),\\") from our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java-plugin-example\\",rel:\\"nofollow\\",children:\\"plugin example repo\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"the-custom-metric\\",children:\\"The custom metric\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For our example application, which is an HTTP server (\\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java-plugin-example/blob/main/application/src/main/java/co/elastic/apm/example/webserver/ExampleBasicHttpServer.java\\",rel:\\"nofollow\\",children:\\"ExampleBasicHttpServer\\"}),\\") we\'d like to add a custom metric \'page_views\' which increments each time the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java-plugin-example/blob/main/application/src/main/java/co/elastic/apm/example/webserver/ExampleBasicHttpServer.java\\",rel:\\"nofollow\\",children:\\"ExampleBasicHttpServer\\"}),` application handles any request. That means the instrumentation we\'ll add will be triggered by the same ExampleBasicHttpServer.handleRequest() method used in \\"`,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Create your own instrumentation with the Java Agent Plugin\\"}),\'.\\"\']}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/custom-metrics-app-code-java-agent-plugin/elastic-blog-2-15m-vis.png\\",alt:\\"A 15-minute line visualization of the page_views metric using Elastic APM\\",width:\\"1999\\",height:\\"1034\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"using-the-pluginopentelemetry-api\\",children:\\"Using the Plugin/OpenTelemetry API\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Essentially the only difference to that article is that for metrics, we\'ll use the \\",(0,t.jsxs)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/metrics/api/\\",rel:\\"nofollow\\",children:[\\"OpenTelemetry \\",(0,t.jsx)(e.em,{children:\\"metrics\\"}),\\" API\\"]}),\\" instead of the \\",(0,t.jsxs)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/manual/\\",rel:\\"nofollow\\",children:[\\"OpenTelemetry \\",(0,t.jsx)(e.em,{children:\\"tracing\\"}),\\" API\\"]}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In particular for the metrics, the advice method for the handleRequest() method is the following code:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`if (pageViewCounter == null) {\\n pageViewCounter = GlobalOpenTelemetry\\n .getMeter(\\"ExampleHttpServer\\")\\n .counterBuilder(\\"page_views\\")\\n .setDescription(\\"Page view count\\")\\n .build();\\n}\\npageViewCounter.add(1);\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"That is, lazily create the meter when it\'s first needed, and then on each invocation of the ExampleBasicHttpServer.handleRequest() method, increment the page view counter.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\'Everything else \\\\u2014 setting up instrumentation, finding the method to instrument, building the plugin \\\\u2014 is the same as in the article \\"\'}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Create your own instrumentation with the Java Agent Plugin\\"}),\'.\\" The full metrics example is implemented in the \',(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java-plugin-example\\",rel:\\"nofollow\\",children:\\"plugin example repo\\"}),\\", and the actual full metrics instrumentation implementation is \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java-plugin-example/blob/main/plugin/src/main/java/co/elastic/apm/example/webserver/plugin/ExampleMetricsInstrumentation.java\\",rel:\\"nofollow\\",children:\\"ExampleMetricsInstrumentation\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/custom-metrics-app-code-java-agent-plugin/elastic-blog-3-bar-chart.png\\",alt:\\"A 15-minute bar chart visualization of the page_views metric using Elastic APM\\",width:\\"1999\\",height:\\"1033\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[`That\'s it! To run the agent with the plugin, just build and include the jar as described in \\"`,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Create your own instrumentation with the Java Agent Plugin\\"}),\',\\" in the directory specified by the plugins_dir configuration option. The \',(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java-plugin-example\\",rel:\\"nofollow\\",children:\\"plugin example repo\\"}),\\" provides a full tested implementation \\\\u2014 just clone it and mvn install to see it working.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The best place to get started with Elastic APM is in the cloud. Begin your \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-observability-application-performance-monitoring-page\\",rel:\\"nofollow\\",children:\\"free trial of Elastic Cloud\\"}),\\" today!\\"]}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/java/current/index.html\\",rel:\\"nofollow\\",children:\\"Elastic APM Java Agent docs\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java/\\",rel:\\"nofollow\\",children:\\"Elastic APM Java Agent repo\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java-plugin-example\\",rel:\\"nofollow\\",children:\\"plugin example\\"}),\\" repo\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The previous \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Create your own instrumentation with the Java Agent Plugin\\"}),\\" article\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The associated \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/regression-testing-your-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Regression testing your Java Agent Plugin\\"}),\\" article\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/metrics/api/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry metrics API\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/manual/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry tracing API\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://micrometer.io/\\",rel:\\"nofollow\\",children:\\"Micrometer\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function p(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return b(x);})();\\n;return Component;"},"_id":"articles/custom-metrics-app-code-java-agent-plugin.mdx","_raw":{"sourceFilePath":"articles/custom-metrics-app-code-java-agent-plugin.mdx","sourceFileName":"custom-metrics-app-code-java-agent-plugin.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/custom-metrics-app-code-java-agent-plugin"},"type":"Article","imageUrl":"/assets/images/custom-metrics-app-code-java-agent-plugin/capture-custom-metrics-blog-720x420.jpeg","readingTime":"4 min read","url":"/custom-metrics-app-code-java-agent-plugin","headings":[{"level":2,"title":"Plugin basics","href":"#plugin-basics"},{"level":2,"title":"The custom metric","href":"#the-custom-metric"},{"level":2,"title":"Using the Plugin/OpenTelemetry API","href":"#using-the-pluginopentelemetry-api"},{"level":2,"title":"Try it out!","href":"#try-it-out"}]},{"title":"Customize your data ingestion with Elastic input packages","slug":"customize-data-ingestion-input-packages","date":"2023-09-26","description":"In this post, learn about input packages and how they can provide a flexible solution to advanced users for customizing their ingestion experience in Elastic.","image":"customize-observability-input-720x420.jpg","author":[{"slug":"ishleen-kaur","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic\xae has enabled the collection, transformation, and analysis of data flowing between the external data sources and Elastic Observability Solution through [integrations](https://www.elastic.co/integrations/). Integration packages achieve this by encapsulating several components, including [agent configuration](https://www.elastic.co/guide/en/fleet/current/create-standalone-agent-policy.html), inputs for data collection, and assets like [ingest pipelines](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html), [data streams](https://www.elastic.co/guide/en/elasticsearch/reference/current/data-streams.html), [index templates](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html), and [visualizations](https://www.elastic.co/guide/en/kibana/current/dashboard.html). The breadth of these assets supported in the Elastic Stack increases day by day.\\n\\nThis blog dives into how input packages provide an extremely generic and flexible solution to the advanced users for customizing their ingestion experience in Elastic.\\n\\n## What are input packages?\\n\\nAn [Elastic Package](https://github.com/elastic/elastic-package) is an artifact that contains a collection of assets that extend the Elastic Stack, providing new capabilities to accomplish a specific task like integration with an external data source. The first use of Elastic packages is [integration packages](https://github.com/elastic/integrations), which provide an end-to-end experience — from configuring Elastic Agent, to collecting signals from the data source, to ingesting them correctly and using the data once ingested.\\n\\nHowever, advanced users may need to customize data collection, either because an integration does not exist for a specific data source, or even if it does, they want to collect additional signals or in a different way. Input packages are another type of [Elastic package](https://github.com/elastic/elastic-package) that provides the capability to configure Elastic Agent to use the provided inputs in a custom way.\\n\\n## Let’s look at an example\\n\\nSay hello to Julia, who works as an engineer at Ascio Innovation firm. She is currently working with Oracle Weblogic server and wants to get a set of metrics for monitoring it. She goes ahead and installs Elastic [Oracle Weblogic Integration](https://docs.elastic.co/integrations/oracle_weblogic), which uses Jolokia in the backend to fetch the metrics.\\n\\nNow, her team wants to advance in the monitoring and has the following requirements:\\n\\n1. We should be able to extract metrics other than the default ones, which are not supported by the default Oracle Weblogic Integration.\\n\\n2. We want to have our own bespoke pipelines, visualizations, and experience.\\n\\n3. We should be able to identify the metrics coming in from two different instances of Weblogic Servers by having data mapped to separate [indices](https://www.elastic.co/blog/what-is-an-elasticsearch-index).\\n\\nAll the above requirements can be met by using the [Jolokia input package](https://docs.elastic.co/integrations/jolokia) to get a customized experience. Let\'s see how.\\n\\nJulia can add the configuration of Jolokia input package as below, fulfilling the _first requirement._\\n\\nhostname, JMX Mappings for the fields you want to fetch for the JVM application, and the [data set](https://www.elastic.co/guide/en/ecs/master/ecs-data_stream.html#field-data-stream-dataset) name to which the response fields would get mapped.\\n\\n![Configuration Parameters for Jolokia Input package](/assets/images/customize-data-ingestion-input-packages/elastic-blog-1-config-parameters.png)\\n\\n![Metrics getting mapped to the index created by the ‘jolokia_first_dataset’](/assets/images/customize-data-ingestion-input-packages/elastic-blog-2-expanded-doc.png)\\n\\nJulia can customize her data by writing her own [ingest pipelines](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) and providing her customized [mappings](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html). Also, she can then build her own bespoke dashboards, hence meeting her _second requirement._\\n\\n![Customization of Ingest Pipelines and Mappings](/assets/images/customize-data-ingestion-input-packages/elastic-blog-3-ingest-pipelines.png)\\n\\nLet’s say now Julia wants to use another instance of Oracle Weblogic and get a different set of metrics.\\n\\nThis can be achieved by adding another instance of Jolokia input package and specifying a new [data set](https://www.elastic.co/guide/en/ecs/master/ecs-data_stream.html#field-data-stream-dataset) name as shown in the screenshot below. The resultant metrics will be mapped to a different [index](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html)/data set hence fulfilling her _third requirement._ This will help Julia to differentiate metrics coming in from two different instances of Oracle Weblogic.\\n\\n![jolokia metrics](/assets/images/customize-data-ingestion-input-packages/elastic-blog-4-jolokia.png)\\n\\nThe resultant metrics of the query will be indexed to the new data set, jolokia_second_dataset in the below example.\\n\\n![dataset](/assets/images/customize-data-ingestion-input-packages/elastic-blog-5-dataset.png)\\n\\nAs we can see above, the Jolokia input package provides the flexibility to get new metrics by specifying different JMX Mappings, which are not supported in the default Oracle Weblogic integration (the user gets metrics from a predetermined set of JMX Mappings).\\n\\nThe Jolokia Input package also can be used for monitoring any Java-based application, which pushes its metrics through JMX. So a single input package can be used to collect metrics from multiple Java applications/services.\\n\\n## Elastic input packages\\n\\nElastic has started supporting input packages from the 8.8.0 release. Some of the input packages are now available in beta and will mature gradually:\\n\\n1. [SQL input package](https://docs.elastic.co/integrations/sql): The SQL input package allows you to execute queries against any SQL database and store the results in Elasticsearch\xae.\\n\\n2. [Prometheus input package](https://docs.elastic.co/integrations/prometheus_input): This input package can collect metrics from [Prometheus Exporters (Collectors)](https://prometheus.io/docs/instrumenting/exporters/).It can be used by any service exporting its metrics to a Prometheus endpoint.\\n\\n3. [Jolokia input package](https://docs.elastic.co/integrations/jolokia): This input package collects metrics from [Jolokia agents](https://jolokia.org/agent.html) running on a target JMX server or dedicated proxy server. It can be used for monitoring any Java-based application, which pushes its metrics through JMX.\\n\\n4. [Statsd input package](https://docs.elastic.co/integrations/statsd_input): The statsd input package spawns a UDP server and listens for metrics in StatsD compatible format. This input can be used to collect metrics from services that send data over the StatsD protocol.\\n\\n5. [GCP Metrics input package](https://docs.elastic.co/integrations/gcp_metrics): The GCP Metrics input package can collect custom metrics for any GCP service.\\n\\n## Try it out!\\n\\nNow that you know more about input packages, try building your own customized integration for your service through input packages, and get started with an [Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home) free trial.\\n\\nWe would love to hear from you about your experience with input packages on the Elastic [Discuss](https://discuss.elastic.co/) forum or in [the Elastic Integrations repository](https://github.com/elastic/integrations).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),b=(i,e)=>{for(var a in e)o(i,a,{get:e[a],enumerable:!0})},r=(i,e,a,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of u(e))!m.call(i,n)&&n!==a&&o(i,n,{get:()=>e[n],enumerable:!(s=g(e,n))||s.enumerable});return i};var k=(i,e,a)=>(a=i!=null?p(f(i)):{},r(e||!i||!i.__esModule?o(a,\\"default\\",{value:i,enumerable:!0}):a,i)),y=i=>r(o({},\\"__esModule\\",{value:!0}),i);var c=w((E,l)=>{l.exports=_jsx_runtime});var x={};b(x,{default:()=>d,frontmatter:()=>v});var t=k(c()),v={title:\\"Customize your data ingestion with Elastic input packages\\",slug:\\"customize-data-ingestion-input-packages\\",date:\\"2023-09-26\\",description:\\"In this post, learn about input packages and how they can provide a flexible solution to advanced users for customizing their ingestion experience in Elastic.\\",author:[{slug:\\"ishleen-kaur\\"}],image:\\"customize-observability-input-720x420.jpg\\",tags:[{slug:\\"log-analytics\\"}]};function h(i){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" has enabled the collection, transformation, and analysis of data flowing between the external data sources and Elastic Observability Solution through \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/\\",rel:\\"nofollow\\",children:\\"integrations\\"}),\\". Integration packages achieve this by encapsulating several components, including \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/create-standalone-agent-policy.html\\",rel:\\"nofollow\\",children:\\"agent configuration\\"}),\\", inputs for data collection, and assets like \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html\\",rel:\\"nofollow\\",children:\\"ingest pipelines\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/data-streams.html\\",rel:\\"nofollow\\",children:\\"data streams\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html\\",rel:\\"nofollow\\",children:\\"index templates\\"}),\\", and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/dashboard.html\\",rel:\\"nofollow\\",children:\\"visualizations\\"}),\\". The breadth of these assets supported in the Elastic Stack increases day by day.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This blog dives into how input packages provide an extremely generic and flexible solution to the advanced users for customizing their ingestion experience in Elastic.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-are-input-packages\\",children:\\"What are input packages?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"An \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-package\\",rel:\\"nofollow\\",children:\\"Elastic Package\\"}),\\" is an artifact that contains a collection of assets that extend the Elastic Stack, providing new capabilities to accomplish a specific task like integration with an external data source. The first use of Elastic packages is \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/integrations\\",rel:\\"nofollow\\",children:\\"integration packages\\"}),\\", which provide an end-to-end experience \\\\u2014 from configuring Elastic Agent, to collecting signals from the data source, to ingesting them correctly and using the data once ingested.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"However, advanced users may need to customize data collection, either because an integration does not exist for a specific data source, or even if it does, they want to collect additional signals or in a different way. Input packages are another type of \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-package\\",rel:\\"nofollow\\",children:\\"Elastic package\\"}),\\" that provides the capability to configure Elastic Agent to use the provided inputs in a custom way.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"lets-look-at-an-example\\",children:\\"Let\\\\u2019s look at an example\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Say hello to Julia, who works as an engineer at Ascio Innovation firm. She is currently working with Oracle Weblogic server and wants to get a set of metrics for monitoring it. She goes ahead and installs Elastic \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/oracle_weblogic\\",rel:\\"nofollow\\",children:\\"Oracle Weblogic Integration\\"}),\\", which uses Jolokia in the backend to fetch the metrics.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now, her team wants to advance in the monitoring and has the following requirements:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"We should be able to extract metrics other than the default ones, which are not supported by the default Oracle Weblogic Integration.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"We want to have our own bespoke pipelines, visualizations, and experience.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"We should be able to identify the metrics coming in from two different instances of Weblogic Servers by having data mapped to separate \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/what-is-an-elasticsearch-index\\",rel:\\"nofollow\\",children:\\"indices\\"}),\\".\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"All the above requirements can be met by using the \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/jolokia\\",rel:\\"nofollow\\",children:\\"Jolokia input package\\"}),\\" to get a customized experience. Let\'s see how.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Julia can add the configuration of Jolokia input package as below, fulfilling the \\",(0,t.jsx)(e.em,{children:\\"first requirement.\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"hostname, JMX Mappings for the fields you want to fetch for the JVM application, and the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/master/ecs-data_stream.html#field-data-stream-dataset\\",rel:\\"nofollow\\",children:\\"data set\\"}),\\" name to which the response fields would get mapped.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/customize-data-ingestion-input-packages/elastic-blog-1-config-parameters.png\\",alt:\\"Configuration Parameters for Jolokia Input package\\",width:\\"1546\\",height:\\"1566\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/customize-data-ingestion-input-packages/elastic-blog-2-expanded-doc.png\\",alt:\\"Metrics getting mapped to the index created by the \\\\u2018jolokia_first_dataset\\\\u2019\\",width:\\"1999\\",height:\\"1171\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Julia can customize her data by writing her own \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html\\",rel:\\"nofollow\\",children:\\"ingest pipelines\\"}),\\" and providing her customized \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html\\",rel:\\"nofollow\\",children:\\"mappings\\"}),\\". Also, she can then build her own bespoke dashboards, hence meeting her \\",(0,t.jsx)(e.em,{children:\\"second requirement.\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/customize-data-ingestion-input-packages/elastic-blog-3-ingest-pipelines.png\\",alt:\\"Customization of Ingest Pipelines and Mappings\\",width:\\"868\\",height:\\"834\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s say now Julia wants to use another instance of Oracle Weblogic and get a different set of metrics.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This can be achieved by adding another instance of Jolokia input package and specifying a new \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/master/ecs-data_stream.html#field-data-stream-dataset\\",rel:\\"nofollow\\",children:\\"data set\\"}),\\" name as shown in the screenshot below. The resultant metrics will be mapped to a different \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html\\",rel:\\"nofollow\\",children:\\"index\\"}),\\"/data set hence fulfilling her \\",(0,t.jsx)(e.em,{children:\\"third requirement.\\"}),\\" This will help Julia to differentiate metrics coming in from two different instances of Oracle Weblogic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/customize-data-ingestion-input-packages/elastic-blog-4-jolokia.png\\",alt:\\"jolokia metrics\\",width:\\"1646\\",height:\\"1568\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The resultant metrics of the query will be indexed to the new data set, jolokia_second_dataset in the below example.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/customize-data-ingestion-input-packages/elastic-blog-5-dataset.png\\",alt:\\"dataset\\",width:\\"1999\\",height:\\"924\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As we can see above, the Jolokia input package provides the flexibility to get new metrics by specifying different JMX Mappings, which are not supported in the default Oracle Weblogic integration (the user gets metrics from a predetermined set of JMX Mappings).\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Jolokia Input package also can be used for monitoring any Java-based application, which pushes its metrics through JMX. So a single input package can be used to collect metrics from multiple Java applications/services.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"elastic-input-packages\\",children:\\"Elastic input packages\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic has started supporting input packages from the 8.8.0 release. Some of the input packages are now available in beta and will mature gradually:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/sql\\",rel:\\"nofollow\\",children:\\"SQL input package\\"}),\\": The SQL input package allows you to execute queries against any SQL database and store the results in Elasticsearch\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/prometheus_input\\",rel:\\"nofollow\\",children:\\"Prometheus input package\\"}),\\": This input package can collect metrics from \\",(0,t.jsx)(e.a,{href:\\"https://prometheus.io/docs/instrumenting/exporters/\\",rel:\\"nofollow\\",children:\\"Prometheus Exporters (Collectors)\\"}),\\".It can be used by any service exporting its metrics to a Prometheus endpoint.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/jolokia\\",rel:\\"nofollow\\",children:\\"Jolokia input package\\"}),\\": This input package collects metrics from \\",(0,t.jsx)(e.a,{href:\\"https://jolokia.org/agent.html\\",rel:\\"nofollow\\",children:\\"Jolokia agents\\"}),\\" running on a target JMX server or dedicated proxy server. It can be used for monitoring any Java-based application, which pushes its metrics through JMX.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/statsd_input\\",rel:\\"nofollow\\",children:\\"Statsd input package\\"}),\\": The statsd input package spawns a UDP server and listens for metrics in StatsD compatible format. This input can be used to collect metrics from services that send data over the StatsD protocol.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/gcp_metrics\\",rel:\\"nofollow\\",children:\\"GCP Metrics input package\\"}),\\": The GCP Metrics input package can collect custom metrics for any GCP service.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that you know more about input packages, try building your own customized integration for your service through input packages, and get started with an \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" free trial.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We would love to hear from you about your experience with input packages on the Elastic \\",(0,t.jsx)(e.a,{href:\\"https://discuss.elastic.co/\\",rel:\\"nofollow\\",children:\\"Discuss\\"}),\\" forum or in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/integrations\\",rel:\\"nofollow\\",children:\\"the Elastic Integrations repository\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return y(x);})();\\n;return Component;"},"_id":"articles/customize-data-ingestion-elastic-input-packages.mdx","_raw":{"sourceFilePath":"articles/customize-data-ingestion-elastic-input-packages.mdx","sourceFileName":"customize-data-ingestion-elastic-input-packages.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/customize-data-ingestion-elastic-input-packages"},"type":"Article","imageUrl":"/assets/images/customize-data-ingestion-input-packages/customize-observability-input-720x420.jpg","readingTime":"5 min read","url":"/customize-data-ingestion-input-packages","headings":[{"level":2,"title":"What are input packages?","href":"#what-are-input-packages"},{"level":2,"title":"Let’s look at an example","href":"#lets-look-at-an-example"},{"level":2,"title":"Elastic input packages","href":"#elastic-input-packages"},{"level":2,"title":"Try it out!","href":"#try-it-out"}]},{"title":"How to deploy a Hello World web app with Elastic Observability on AWS App Runner","slug":"deploy-app-observability-aws-app-runner","date":"2023-10-02","description":"Follow the step-by-step process of instrumenting Elastic Observability for a Hello World web app running on AWS App Runner.","image":"library-branding-elastic-observability-white-1680x980.png","author":[{"slug":"jonathan-simon","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"aws","type":"Tag","_raw":{}},{"slug":"aws-app-runner","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic Observability is the premiere tool to provide visibility into web apps running in your environment. AWS App Runner is the serverless platform of choice to run your web apps that need to scale up and down massively to meet demand or minimize costs. Elastic Observability combined with AWS App Runner is the perfect solution for developers to deploy [web apps that are auto-scaled with fully observable operations](https://www.elastic.co/blog/observability-powerful-flexible-efficient), in a way that’s straightforward to implement and manage.\\n\\nThis blog post will show you how to deploy a simple Hello World web app to App Runner and then walk you through the steps to instrument the Hello World web app to enable observation of the application’s operations with Elastic Cloud.\\n\\n## Elastic Observability setup\\n\\nWe’ll start with setting up an Elastic Cloud deployment, which is where observability will take place for the web app we’ll be deploying.\\n\\nFrom the [Elastic Cloud console](https://cloud.elastic.co), select **Create deployment**.\\n\\n![1 create deployment](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-1-create-deployment.png)\\n\\nEnter a deployment name and click **Create deployment**. It takes a few minutes for your deployment to be created. While waiting, you are prompted to save the admin credentials for your deployment, which provides you with superuser access to your Elastic\xae deployment. Keep these credentials safe as they are shown only once.\\n\\nElastic Observability requires an APM Server URL and an APM Secret token for an app to send observability data to Elastic Cloud. Once the deployment is created, we’ll copy the Elastic Observability server URL and secret token and store them somewhere safely for adding to our web app code in a later step.\\n\\nTo copy the APM Server URL and the APM Secret Token, go to [Elastic Cloud](https://cloud.elastic.co/home). Then go to the[Deployments](https://cloud.elastic.co/deployments) page, which lists all of the deployments you have created. Select the deployment you want to use, which will open the deployment details page. In the Kibana\xae row of links, click on **Open** to open **Kibana** for your deployment.\\n\\n![2 my deployment](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-2-my-deployment.png)\\n\\nSelect **Integrations** from the top-level menu. Then click the **APM** tile.\\n\\n![3 apm](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-3-apm.png)\\n\\nOn the APM Agents page, copy the secretToken and the serverUrl values and save them for use in a later step.\\n\\n![4 apm agents](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-4-apm-agents.png)\\n\\nNow that we’ve completed the Elastic Cloud setup, the next step is to set up our AWS project for deploying apps to App Runner.\\n\\n## AWS App Runner setup\\n\\nTo start using AWS App Runner, you need an AWS account. If you’re a brand new user, go to [aws.amazon.com](https://aws.amazon.com) to sign up for a new account.\\n\\n![5 start building on aws today](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-5-start-building.png)\\n\\n## Set up AWS CloudShell\\n\\nWe’ll perform the process of creating a Python Hello World App image and pushing it to the AWS ECR using AWS CloudShell.\\n\\nWe’re going to use Docker to build the sample app image. Perform the following five steps to set up Docker within CloudShell.\\n\\n1. Open [AWS CloudShell](https://console.aws.amazon.com/cloudshell/).\\n\\n![6 welcome to aws cloudshell](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-6-welcome-to-aws-cloudshell.png)\\n\\n![7 aws cloudshell](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-7-aws-cloudshell.png)\\n\\n2. Run the following two commands to install Docker in CloudShell:\\n\\n```bash\\nsudo yum update -y\\nsudo amazon-linux-extras install docker\\n```\\n\\n3. Start Docker by running the command:\\n\\n```bash\\nsudo dockerd\\n```\\n\\n4. With Docker running, open a new tab in CloudShell by clicking the **Actions** dropdown menu and selecting **New tab**.\\n\\n![8 aws cloudshell with code](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-8-aws-cloudshell-with-code.png)\\n\\n5. Run the following command to authenticate Docker within CloudShell. Replace \\\\ with your AWS Account ID in the Docker command below, and then run it in CloudShell.\\n\\n```bash\\naws ecr get-login-password --region us-east-2 | sudo docker login --username AWS --password-stdin .dkr.ecr.us-east-2.amazonaws.com\\n```\\n\\n## Build the Hello World web app image and push it to AWS ECR\\n\\nWe’ll be using [AWS ECR](https://aws.amazon.com/ecr/), Amazon’s fully managed container registry for storing and deploying application images. To build and push the Hello World app image to AWS ECR, we’ll perform the following six steps in [AWS CloudShell](https://console.aws.amazon.com/cloudshell/):\\n\\n1. Run the command below in CloudShell to create a repository in AWS ECR.\\n\\n```bash\\naws ecr create-repository \\\\\\n --repository-name elastic-helloworld/web \\\\\\n --image-scanning-configuration scanOnPush=true \\\\\\n --region us-east-2\\n```\\n\\n**“elastic-helloworld”** will be the application\'s name and “ **web”** will be the service name.\\n\\n2. In the newly created tab within CloudShell, clone a [Python Hello World sample app](https://github.com/elastic/observability-examples/tree/main/aws/app-runner/helloworld) repo from GitHub by entering the following command.\\n\\n```bash\\ngit clone https://github.com/elastic/observability-examples\\n```\\n\\n3. Change directory to the location of the Hello World web app code by running the following command:\\n\\n```bash\\ncd observability-examples/aws/app-runner/helloworld\\n```\\n\\n4. Build the Hello World sample app from the application’s directory. Run the following Docker command in CloudShell.\\n\\n```bash\\nsudo docker build -t elastic-helloworld/web .\\n```\\n\\n5. Tag the application image. Replace \\\\ with your AWS Account ID in the Docker command below, and then run it in CloudShell.\\n\\n```bash\\nsudo docker tag elastic-helloworld/web:latest .dkr.ecr.us-east-2.amazonaws.com/elastic-helloworld/web:latest\\n```\\n\\n6. Push the application image to ECR. Replace \\\\ with your AWS Account ID in the command below, and then run it in CloudShell.\\n\\n```bash\\nsudo docker push .dkr.ecr.us-east-2.amazonaws.com/elastic-helloworld/web:latest\\n```\\n\\n## Deploy a Hello World web app to AWS App Runner\\n\\nWe’ll perform the process of deploying a Python Hello World App to App Runner using the AWS App Runner console.\\n\\n1. Open the [App Runner console](https://console.aws.amazon.com/apprunner/) and click the **Create an App Runner service** button.\\n\\n![9 aws app runner](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-9-aws-app-runner.png)\\n\\n2. On the Source and deployment page, set the following deployment details:\\n\\n- In the Source section, for Repository type, choose **Container registry**.\\n- For Provider, choose **Amazon ECR**.\\n- For Container image URI, choose **Browse** to select the Hello World application image that we previously pushed to AWS ECR.\\n - In the Select Amazon ECR container image dialog box, for Image repository, select the “ **elastic-helloworld/web”** repository.\\n - For Image tag, select “ **latest”** and then choose **Continue**.\\n- In the Deployment settings section, choose **Automatic**.\\n- For ECR access role, choose **Create new service role.**\\n- Click **Next**.\\n\\n![10 source and deployment](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-10-source-and-deployment.png)\\n\\n3. On the Configure service page, in the Service settings section, enter the service name “ **helloworld-app**.” Leave all the other settings as they are and click **Next**.\\n\\n![11 configure service](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-11-configure-service.png)\\n\\n4. On the Review and create page, click **Create & deploy**.\\n\\n![12 review and create](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-12-review-and-create.png)\\n\\nAfter a few minutes, the Hello World app will be deployed to App Runner.\\n\\n![13 hello world app green text](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-13-helloworld-app.png)\\n\\n5. Click the **Default domain** URL to view the Hello World app running in App Runner.\\n\\n![14 hello world](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-14-hello-world.png)\\n\\n## Instrument the Hello World web app with Elastic Observability\\n\\nWith a web app successfully running in App Runner, we’re now ready to add the minimal code necessary to start monitoring the app. To enable observability for the Hello World app in Elastic Cloud, we’ll perform the following five steps in [AWS CloudShell](https://console.aws.amazon.com/cloudshell):\\n\\n1. Edit the Dockerfile file to add the following Elastic Open Telemetry environment variables along with the commands to install and run the Elastic APM agent. Use the “nano” text editor by typing “nano Dockerfile”. Be sure to replace the \\\\ text and the \\\\ text with the APM Server URL and the APM Secret Token values that you copied and saved in an earlier step. The updated Dockerfile should look something like this:\\n\\n```python\\nFROM python:3.9-slim as base\\n\\n# get packages\\nCOPY requirements.txt .\\nRUN pip install -r requirements.txt\\n\\nWORKDIR /app\\n\\n# install opentelemetry packages\\nRUN pip install opentelemetry-distro opentelemetry-exporter-otlp\\nRUN opentelemetry-bootstrap -a install\\n\\nENV OTEL_EXPORTER_OTLP_ENDPOINT=\'\'\\nENV OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer%20\'\\nENV OTEL_LOG_LEVEL=info\\nENV OTEL_METRICS_EXPORTER=otlp\\nENV OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\nENV OTEL_SERVICE_NAME=helloworld\\nENV OTEL_TRACES_EXPORTER=otlp\\n\\nCOPY . .\\nENV FLASK_APP=helloworld\\nENV FLASK_RUN_HOST=0.0.0.0\\nENV FLASK_RUN_PORT=8080\\nEXPOSE 8080\\nENTRYPOINT [ \\"opentelemetry-instrument\\", \\"flask\\", \\"run\\" ]\\n```\\n\\nNote: You can close the nano text editor and save the file by typing “Ctrl + x”. Press the “y” key and then the “Enter” key to save the changes.\\n\\n2. Edit the helloworld.py file to add observability traces. In CloudShell, type “nano helloworld.py” to edit the file.\\n\\n- After the import statements at the top of the file, add the code required to initialize the Elastic Open Telemetry APM agent:\\n\\n```python\\nfrom opentelemetry import trace\\ntracer = trace.get_tracer(\\"hello-world\\")\\n```\\n\\n- Replace the “Hello World!” output code . . .\\n\\n```javascript\\nreturn \\"Hello World!
\\";\\n```\\n\\n- … with the Hello Elastic Observability code block.\\n\\n```javascript\\nreturn \'\'\'\\n\\n
\\nHello Elastic Observability - AWS App Runner - Python\\n
\\n
\\n
\\n\'\'\'\\n```\\n\\n- Then add a “hi” trace before the Hello Elastic Observability code block along with an additional “@app.after_request” method placed afterward to implement a “bye” trace.\\n\\n```python\\n@app.route(\\"/\\")\\ndef helloworld():\\n\\twith tracer.start_as_current_span(\\"hi\\") as span:\\n \\t logging.info(\\"hello\\")\\n \\t return \'\'\'\\n \\t \\n \\t
\\n \\t Hello Elastic Observability - AWS App Runner - Python\\n \\t
\\n \\t
\\n \\t
\\n \\t \'\'\'\\n\\n@app.after_request\\ndef after_request(response):\\n\\twith tracer.start_as_current_span(\\"bye\\"):\\n \\t logging.info(\\"goodbye\\")\\n \\t return response\\n```\\n\\nThe completed helloworld.py file should look something like this:\\n\\n```python\\nimport logging\\nfrom flask import Flask\\n\\nfrom opentelemetry import trace\\ntracer = trace.get_tracer(\\"hello-world\\")\\n\\napp = Flask(__name__)\\n\\n@app.route(\\"/\\")\\ndef helloworld():\\n with tracer.start_as_current_span(\\"hi\\") as span:\\n \\t logging.info(\\"hello\\")\\n \\t return \'\'\'\\n \\t\\n \\t
\\n \\tHello Elastic Observability - AWS App Runner - Python\\n \\t
\\n \\t
\\n \\t
\\n \\t\'\'\'\\n\\n@app.after_request\\ndef after_request(response):\\n with tracer.start_as_current_span(\\"bye\\"):\\n \\t logging.info(\\"goodbye\\")\\n \\t return response\\n```\\n\\nNote: You can close the nano text editor and save the file by typing “Ctrl + x”. Press the “y” key and then the “Enter” key to save the changes.\\n\\n1. Rebuild the updated Hello World sample app using Docker from within the application’s directory. Run the following command in CloudShell.\\n\\n```bash\\nsudo docker build -t elastic-helloworld/web .\\n```\\n\\n4. Tag the application image using Docker. Replace \\\\ with your AWS Account ID in the Docker command below and then run it in CloudShell.\\n\\n```bash\\nsudo docker tag elastic-helloworld/web:latest .dkr.ecr.us-east-2.amazonaws.com/elastic-helloworld/web:latest\\n```\\n\\n5. Push the updated application image to ECR. Replace \\\\ with your AWS Account ID in the Docker command below and then run it in CloudShell.\\n\\n```bash\\nsudo docker push .dkr.ecr.us-east-2.amazonaws.com/elastic-helloworld/web:latest\\n```\\n\\nPushing the image to ECR will automatically deploy the new version of the Hello World app.\\n\\n![15 green banner successful deployment](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-15-green-banner-successfully.png)\\n\\nOpen the [App Runner](https://console.aws.amazon.com/apprunner) console. After a few minutes, the Hello World app will be deployed to App Runner. Click the **Default domain** URL to view the updated Hello World app running in App Runner.\\n\\n![16 elastic](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-16-elastic-logo-text-top.png)\\n\\n## Observe the Hello World web app\\n\\nNow that we’ve instrumented the web app to send observability data to Elastic Observability, we can now use Elastic Cloud to monitor the web app’s operations.\\n\\n1. In Elastic Cloud, select the Observability **Services** menu item.\\n\\n2. Click the **helloworld** service.\\n\\n3. Click the **Transactions** tab.\\n\\n4. Scroll down and click the **“/”** transaction.\\n\\n5. Scroll down to the Trace Sample section to see the **“/,” “hi,”** and **“bye”** trace samples.\\n\\n![17 trace sample](/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-17-trace-sample.png)\\n\\n## Observability made to scale\\n\\nYou’ve seen the complete process of deploying a web app to AWS App Runner that is instrumented with Elastic Observability. The end result is a web app that will scale up and down with usage, combined with the observability tools to monitor the web app as it serves one user or millions of users.\\n\\nNow that you’ve seen how to deploy a serverless web app instrumented with observability, visit [Elastic Observability](https://www.elastic.co/observability) to learn more about how to implement a complete observability solution for your apps. Or visit [Getting started with Elastic on AWS](https://www.elastic.co/getting-started/aws) for more examples of how you can drive the data insights you need by combining AWS’s cloud computing services with Elastic’s search-powered platform.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var i=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var b=(l,e)=>()=>(e||l((e={exports:{}}).exports,e),e.exports),y=(l,e)=>{for(var t in e)i(l,t,{get:e[t],enumerable:!0})},r=(l,e,t,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!w.call(l,o)&&o!==t&&i(l,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return l};var f=(l,e,t)=>(t=l!=null?p(m(l)):{},r(e||!l||!l.__esModule?i(t,\\"default\\",{value:l,enumerable:!0}):t,l)),v=l=>r(i({},\\"__esModule\\",{value:!0}),l);var c=b((S,s)=>{s.exports=_jsx_runtime});var A={};y(A,{default:()=>d,frontmatter:()=>E});var n=f(c()),E={title:\\"How to deploy a Hello World web app with Elastic Observability on AWS App Runner\\",slug:\\"deploy-app-observability-aws-app-runner\\",date:\\"2023-10-02\\",description:\\"Follow the step-by-step process of instrumenting Elastic Observability for a Hello World web app running on AWS App Runner.\\",author:[{slug:\\"jonathan-simon\\"}],image:\\"library-branding-elastic-observability-white-1680x980.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"aws\\"},{slug:\\"aws-app-runner\\"},{slug:\\"apm\\"}]};function h(l){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...l.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"Elastic Observability is the premiere tool to provide visibility into web apps running in your environment. AWS App Runner is the serverless platform of choice to run your web apps that need to scale up and down massively to meet demand or minimize costs. Elastic Observability combined with AWS App Runner is the perfect solution for developers to deploy \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-powerful-flexible-efficient\\",rel:\\"nofollow\\",children:\\"web apps that are auto-scaled with fully observable operations\\"}),\\", in a way that\\\\u2019s straightforward to implement and manage.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"This blog post will show you how to deploy a simple Hello World web app to App Runner and then walk you through the steps to instrument the Hello World web app to enable observation of the application\\\\u2019s operations with Elastic Cloud.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"elastic-observability-setup\\",children:\\"Elastic Observability setup\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We\\\\u2019ll start with setting up an Elastic Cloud deployment, which is where observability will take place for the web app we\\\\u2019ll be deploying.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"From the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud console\\"}),\\", select \\",(0,n.jsx)(e.strong,{children:\\"Create deployment\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-1-create-deployment.png\\",alt:\\"1 create deployment\\",width:\\"1527\\",height:\\"1350\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Enter a deployment name and click \\",(0,n.jsx)(e.strong,{children:\\"Create deployment\\"}),\\". It takes a few minutes for your deployment to be created. While waiting, you are prompted to save the admin credentials for your deployment, which provides you with superuser access to your Elastic\\\\xAE deployment. Keep these credentials safe as they are shown only once.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic Observability requires an APM Server URL and an APM Secret token for an app to send observability data to Elastic Cloud. Once the deployment is created, we\\\\u2019ll copy the Elastic Observability server URL and secret token and store them somewhere safely for adding to our web app code in a later step.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To copy the APM Server URL and the APM Secret Token, go to \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\". Then go to the\\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/deployments\\",rel:\\"nofollow\\",children:\\"Deployments\\"}),\\" page, which lists all of the deployments you have created. Select the deployment you want to use, which will open the deployment details page. In the Kibana\\\\xAE row of links, click on \\",(0,n.jsx)(e.strong,{children:\\"Open\\"}),\\" to open \\",(0,n.jsx)(e.strong,{children:\\"Kibana\\"}),\\" for your deployment.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-2-my-deployment.png\\",alt:\\"2 my deployment\\",width:\\"1416\\",height:\\"1095\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Select \\",(0,n.jsx)(e.strong,{children:\\"Integrations\\"}),\\" from the top-level menu. Then click the \\",(0,n.jsx)(e.strong,{children:\\"APM\\"}),\\" tile.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-3-apm.png\\",alt:\\"3 apm\\",width:\\"1377\\",height:\\"1206\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"On the APM Agents page, copy the secretToken and the serverUrl values and save them for use in a later step.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-4-apm-agents.png\\",alt:\\"4 apm agents\\",width:\\"1999\\",height:\\"1415\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that we\\\\u2019ve completed the Elastic Cloud setup, the next step is to set up our AWS project for deploying apps to App Runner.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"aws-app-runner-setup\\",children:\\"AWS App Runner setup\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To start using AWS App Runner, you need an AWS account. If you\\\\u2019re a brand new user, go to \\",(0,n.jsx)(e.a,{href:\\"https://aws.amazon.com\\",rel:\\"nofollow\\",children:\\"aws.amazon.com\\"}),\\" to sign up for a new account.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-5-start-building.png\\",alt:\\"5 start building on aws today\\",width:\\"1999\\",height:\\"628\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"set-up-aws-cloudshell\\",children:\\"Set up AWS CloudShell\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We\\\\u2019ll perform the process of creating a Python Hello World App image and pushing it to the AWS ECR using AWS CloudShell.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We\\\\u2019re going to use Docker to build the sample app image. Perform the following five steps to set up Docker within CloudShell.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Open \\",(0,n.jsx)(e.a,{href:\\"https://console.aws.amazon.com/cloudshell/\\",rel:\\"nofollow\\",children:\\"AWS CloudShell\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-6-welcome-to-aws-cloudshell.png\\",alt:\\"6 welcome to aws cloudshell\\",width:\\"1999\\",height:\\"1133\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-7-aws-cloudshell.png\\",alt:\\"7 aws cloudshell\\",width:\\"1560\\",height:\\"417\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Run the following two commands to install Docker in CloudShell:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`sudo yum update -y\\nsudo amazon-linux-extras install docker\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Start Docker by running the command:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`sudo dockerd\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"With Docker running, open a new tab in CloudShell by clicking the \\",(0,n.jsx)(e.strong,{children:\\"Actions\\"}),\\" dropdown menu and selecting \\",(0,n.jsx)(e.strong,{children:\\"New tab\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-8-aws-cloudshell-with-code.png\\",alt:\\"8 aws cloudshell with code\\",width:\\"1434\\",height:\\"612\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Run the following command to authenticate Docker within CloudShell. Replace with your AWS Account ID in the Docker command below, and then run it in CloudShell.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`aws ecr get-login-password --region us-east-2 | sudo docker login --username AWS --password-stdin .dkr.ecr.us-east-2.amazonaws.com\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"build-the-hello-world-web-app-image-and-push-it-to-aws-ecr\\",children:\\"Build the Hello World web app image and push it to AWS ECR\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We\\\\u2019ll be using \\",(0,n.jsx)(e.a,{href:\\"https://aws.amazon.com/ecr/\\",rel:\\"nofollow\\",children:\\"AWS ECR\\"}),\\", Amazon\\\\u2019s fully managed container registry for storing and deploying application images. To build and push the Hello World app image to AWS ECR, we\\\\u2019ll perform the following six steps in \\",(0,n.jsx)(e.a,{href:\\"https://console.aws.amazon.com/cloudshell/\\",rel:\\"nofollow\\",children:\\"AWS CloudShell\\"}),\\":\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Run the command below in CloudShell to create a repository in AWS ECR.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`aws ecr create-repository \\\\\\\\\\n --repository-name elastic-helloworld/web \\\\\\\\\\n --image-scanning-configuration scanOnPush=true \\\\\\\\\\n --region us-east-2\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"\\\\u201Celastic-helloworld\\\\u201D\\"}),\\" will be the application\'s name and \\\\u201C \\",(0,n.jsx)(e.strong,{children:\\"web\\\\u201D\\"}),\\" will be the service name.\\"]}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"In the newly created tab within CloudShell, clone a \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/aws/app-runner/helloworld\\",rel:\\"nofollow\\",children:\\"Python Hello World sample app\\"}),\\" repo from GitHub by entering the following command.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/elastic/observability-examples\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Change directory to the location of the Hello World web app code by running the following command:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`cd observability-examples/aws/app-runner/helloworld\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Build the Hello World sample app from the application\\\\u2019s directory. Run the following Docker command in CloudShell.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`sudo docker build -t elastic-helloworld/web .\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Tag the application image. Replace with your AWS Account ID in the Docker command below, and then run it in CloudShell.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`sudo docker tag elastic-helloworld/web:latest .dkr.ecr.us-east-2.amazonaws.com/elastic-helloworld/web:latest\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"6\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Push the application image to ECR. Replace with your AWS Account ID in the command below, and then run it in CloudShell.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`sudo docker push .dkr.ecr.us-east-2.amazonaws.com/elastic-helloworld/web:latest\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"deploy-a-hello-world-web-app-to-aws-app-runner\\",children:\\"Deploy a Hello World web app to AWS App Runner\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We\\\\u2019ll perform the process of deploying a Python Hello World App to App Runner using the AWS App Runner console.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Open the \\",(0,n.jsx)(e.a,{href:\\"https://console.aws.amazon.com/apprunner/\\",rel:\\"nofollow\\",children:\\"App Runner console\\"}),\\" and click the \\",(0,n.jsx)(e.strong,{children:\\"Create an App Runner service\\"}),\\" button.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-9-aws-app-runner.png\\",alt:\\"9 aws app runner\\",width:\\"1999\\",height:\\"566\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"On the Source and deployment page, set the following deployment details:\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"In the Source section, for Repository type, choose \\",(0,n.jsx)(e.strong,{children:\\"Container registry\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"For Provider, choose \\",(0,n.jsx)(e.strong,{children:\\"Amazon ECR\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"For Container image URI, choose \\",(0,n.jsx)(e.strong,{children:\\"Browse\\"}),\\" to select the Hello World application image that we previously pushed to AWS ECR.\\",`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"In the Select Amazon ECR container image dialog box, for Image repository, select the \\\\u201C \\",(0,n.jsx)(e.strong,{children:\\"elastic-helloworld/web\\\\u201D\\"}),\\" repository.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"For Image tag, select \\\\u201C \\",(0,n.jsx)(e.strong,{children:\\"latest\\\\u201D\\"}),\\" and then choose \\",(0,n.jsx)(e.strong,{children:\\"Continue\\"}),\\".\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"In the Deployment settings section, choose \\",(0,n.jsx)(e.strong,{children:\\"Automatic\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"For ECR access role, choose \\",(0,n.jsx)(e.strong,{children:\\"Create new service role.\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Click \\",(0,n.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-10-source-and-deployment.png\\",alt:\\"10 source and deployment\\",width:\\"1999\\",height:\\"1978\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"On the Configure service page, in the Service settings section, enter the service name \\\\u201C \\",(0,n.jsx)(e.strong,{children:\\"helloworld-app\\"}),\\".\\\\u201D Leave all the other settings as they are and click \\",(0,n.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-11-configure-service.png\\",alt:\\"11 configure service\\",width:\\"1405\\",height:\\"1999\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"On the Review and create page, click \\",(0,n.jsx)(e.strong,{children:\\"Create & deploy\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-12-review-and-create.png\\",alt:\\"12 review and create\\",width:\\"1999\\",height:\\"1790\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"After a few minutes, the Hello World app will be deployed to App Runner.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-13-helloworld-app.png\\",alt:\\"13 hello world app green text\\",width:\\"1999\\",height:\\"711\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Click the \\",(0,n.jsx)(e.strong,{children:\\"Default domain\\"}),\\" URL to view the Hello World app running in App Runner.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-14-hello-world.png\\",alt:\\"14 hello world\\",width:\\"1440\\",height:\\"318\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"instrument-the-hello-world-web-app-with-elastic-observability\\",children:\\"Instrument the Hello World web app with Elastic Observability\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"With a web app successfully running in App Runner, we\\\\u2019re now ready to add the minimal code necessary to start monitoring the app. To enable observability for the Hello World app in Elastic Cloud, we\\\\u2019ll perform the following five steps in \\",(0,n.jsx)(e.a,{href:\\"https://console.aws.amazon.com/cloudshell\\",rel:\\"nofollow\\",children:\\"AWS CloudShell\\"}),\\":\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Edit the Dockerfile file to add the following Elastic Open Telemetry environment variables along with the commands to install and run the Elastic APM agent. Use the \\\\u201Cnano\\\\u201D text editor by typing \\\\u201Cnano Dockerfile\\\\u201D. Be sure to replace the text and the text with the APM Server URL and the APM Secret Token values that you copied and saved in an earlier step. The updated Dockerfile should look something like this:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`FROM python:3.9-slim as base\\n\\n# get packages\\nCOPY requirements.txt .\\nRUN pip install -r requirements.txt\\n\\nWORKDIR /app\\n\\n# install opentelemetry packages\\nRUN pip install opentelemetry-distro opentelemetry-exporter-otlp\\nRUN opentelemetry-bootstrap -a install\\n\\nENV OTEL_EXPORTER_OTLP_ENDPOINT=\'\'\\nENV OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer%20\'\\nENV OTEL_LOG_LEVEL=info\\nENV OTEL_METRICS_EXPORTER=otlp\\nENV OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\nENV OTEL_SERVICE_NAME=helloworld\\nENV OTEL_TRACES_EXPORTER=otlp\\n\\nCOPY . .\\nENV FLASK_APP=helloworld\\nENV FLASK_RUN_HOST=0.0.0.0\\nENV FLASK_RUN_PORT=8080\\nEXPOSE 8080\\nENTRYPOINT [ \\"opentelemetry-instrument\\", \\"flask\\", \\"run\\" ]\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Note: You can close the nano text editor and save the file by typing \\\\u201CCtrl + x\\\\u201D. Press the \\\\u201Cy\\\\u201D key and then the \\\\u201CEnter\\\\u201D key to save the changes.\\"}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Edit the helloworld.py file to add observability traces. In CloudShell, type \\\\u201Cnano helloworld.py\\\\u201D to edit the file.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"After the import statements at the top of the file, add the code required to initialize the Elastic Open Telemetry APM agent:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`from opentelemetry import trace\\ntracer = trace.get_tracer(\\"hello-world\\")\\n`})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Replace the \\\\u201CHello World!\\\\u201D output code . . .\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`return \\"Hello World!
\\";\\n`})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"\\\\u2026 with the Hello Elastic Observability code block.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`return \'\'\'\\n\\n
\\nHello Elastic Observability - AWS App Runner - Python\\n
\\n
\\n
\\n\'\'\'\\n`})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Then add a \\\\u201Chi\\\\u201D trace before the Hello Elastic Observability code block along with an additional \\\\u201C@app.after_request\\\\u201D method placed afterward to implement a \\\\u201Cbye\\\\u201D trace.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`@app.route(\\"/\\")\\ndef helloworld():\\n\\twith tracer.start_as_current_span(\\"hi\\") as span:\\n \\t logging.info(\\"hello\\")\\n \\t return \'\'\'\\n \\t \\n \\t
\\n \\t Hello Elastic Observability - AWS App Runner - Python\\n \\t
\\n \\t
\\n \\t
\\n \\t \'\'\'\\n\\n@app.after_request\\ndef after_request(response):\\n\\twith tracer.start_as_current_span(\\"bye\\"):\\n \\t logging.info(\\"goodbye\\")\\n \\t return response\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The completed helloworld.py file should look something like this:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`import logging\\nfrom flask import Flask\\n\\nfrom opentelemetry import trace\\ntracer = trace.get_tracer(\\"hello-world\\")\\n\\napp = Flask(__name__)\\n\\n@app.route(\\"/\\")\\ndef helloworld():\\n with tracer.start_as_current_span(\\"hi\\") as span:\\n \\t logging.info(\\"hello\\")\\n \\t return \'\'\'\\n \\t\\n \\t
\\n \\tHello Elastic Observability - AWS App Runner - Python\\n \\t
\\n \\t
\\n \\t
\\n \\t\'\'\'\\n\\n@app.after_request\\ndef after_request(response):\\n with tracer.start_as_current_span(\\"bye\\"):\\n \\t logging.info(\\"goodbye\\")\\n \\t return response\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Note: You can close the nano text editor and save the file by typing \\\\u201CCtrl + x\\\\u201D. Press the \\\\u201Cy\\\\u201D key and then the \\\\u201CEnter\\\\u201D key to save the changes.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Rebuild the updated Hello World sample app using Docker from within the application\\\\u2019s directory. Run the following command in CloudShell.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`sudo docker build -t elastic-helloworld/web .\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Tag the application image using Docker. Replace with your AWS Account ID in the Docker command below and then run it in CloudShell.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`sudo docker tag elastic-helloworld/web:latest .dkr.ecr.us-east-2.amazonaws.com/elastic-helloworld/web:latest\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Push the updated application image to ECR. Replace with your AWS Account ID in the Docker command below and then run it in CloudShell.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`sudo docker push .dkr.ecr.us-east-2.amazonaws.com/elastic-helloworld/web:latest\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Pushing the image to ECR will automatically deploy the new version of the Hello World app.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-15-green-banner-successfully.png\\",alt:\\"15 green banner successful deployment\\",width:\\"1437\\",height:\\"940\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Open the \\",(0,n.jsx)(e.a,{href:\\"https://console.aws.amazon.com/apprunner\\",rel:\\"nofollow\\",children:\\"App Runner\\"}),\\" console. After a few minutes, the Hello World app will be deployed to App Runner. Click the \\",(0,n.jsx)(e.strong,{children:\\"Default domain\\"}),\\" URL to view the updated Hello World app running in App Runner.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-16-elastic-logo-text-top.png\\",alt:\\"16 elastic\\",width:\\"1999\\",height:\\"908\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"observe-the-hello-world-web-app\\",children:\\"Observe the Hello World web app\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that we\\\\u2019ve instrumented the web app to send observability data to Elastic Observability, we can now use Elastic Cloud to monitor the web app\\\\u2019s operations.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"In Elastic Cloud, select the Observability \\",(0,n.jsx)(e.strong,{children:\\"Services\\"}),\\" menu item.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Click the \\",(0,n.jsx)(e.strong,{children:\\"helloworld\\"}),\\" service.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Click the \\",(0,n.jsx)(e.strong,{children:\\"Transactions\\"}),\\" tab.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Scroll down and click the \\",(0,n.jsx)(e.strong,{children:\\"\\\\u201C/\\\\u201D\\"}),\\" transaction.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Scroll down to the Trace Sample section to see the \\",(0,n.jsx)(e.strong,{children:\\"\\\\u201C/,\\\\u201D \\\\u201Chi,\\\\u201D\\"}),\\" and \\",(0,n.jsx)(e.strong,{children:\\"\\\\u201Cbye\\\\u201D\\"}),\\" trace samples.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-aws-app-runner/elastic-blog-17-trace-sample.png\\",alt:\\"17 trace sample\\",width:\\"1999\\",height:\\"687\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"observability-made-to-scale\\",children:\\"Observability made to scale\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"You\\\\u2019ve seen the complete process of deploying a web app to AWS App Runner that is instrumented with Elastic Observability. The end result is a web app that will scale up and down with usage, combined with the observability tools to monitor the web app as it serves one user or millions of users.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now that you\\\\u2019ve seen how to deploy a serverless web app instrumented with observability, visit \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\"}),\\" to learn more about how to implement a complete observability solution for your apps. Or visit \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/getting-started/aws\\",rel:\\"nofollow\\",children:\\"Getting started with Elastic on AWS\\"}),\\" for more examples of how you can drive the data insights you need by combining AWS\\\\u2019s cloud computing services with Elastic\\\\u2019s search-powered platform.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(l={}){let{wrapper:e}=l.components||{};return e?(0,n.jsx)(e,{...l,children:(0,n.jsx)(h,{...l})}):h(l)}return v(A);})();\\n;return Component;"},"_id":"articles/deploy-hello-world-web-app-elastic-observability-aws-app-runner.mdx","_raw":{"sourceFilePath":"articles/deploy-hello-world-web-app-elastic-observability-aws-app-runner.mdx","sourceFileName":"deploy-hello-world-web-app-elastic-observability-aws-app-runner.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/deploy-hello-world-web-app-elastic-observability-aws-app-runner"},"type":"Article","imageUrl":"/assets/images/deploy-app-observability-aws-app-runner/library-branding-elastic-observability-white-1680x980.png","readingTime":"11 min read","url":"/deploy-app-observability-aws-app-runner","headings":[{"level":2,"title":"Elastic Observability setup","href":"#elastic-observability-setup"},{"level":2,"title":"AWS App Runner setup","href":"#aws-app-runner-setup"},{"level":2,"title":"Set up AWS CloudShell","href":"#set-up-aws-cloudshell"},{"level":2,"title":"Build the Hello World web app image and push it to AWS ECR","href":"#build-the-hello-world-web-app-image-and-push-it-to-aws-ecr"},{"level":2,"title":"Deploy a Hello World web app to AWS App Runner","href":"#deploy-a-hello-world-web-app-to-aws-app-runner"},{"level":2,"title":"Instrument the Hello World web app with Elastic Observability","href":"#instrument-the-hello-world-web-app-with-elastic-observability"},{"level":2,"title":"Observe the Hello World web app","href":"#observe-the-hello-world-web-app"},{"level":2,"title":"Observability made to scale","href":"#observability-made-to-scale"}]},{"title":"The DNA of DATA Increasing Efficiency with the Elastic Common Schema","slug":"dna-of-data","date":"2024-09-25","description":"Elastic ECS helps improve semantic conversion of log fields. Learn how quantifying the benefits of normalized data, not just for infrastructure efficiency, but also data fidelity.","image":"dna-of-data.jpg","author":[{"slug":"peter-titov","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe Elastic Common Schema is a fantastic way to simplify and unify a search experience. By aligning disparate data sources into a common language, users have a lower bar to overcome with interpreting events of interest, resolving incidents or hunting for unknown threats. However, there are underlying infrastructure reasons to justify adopting the Elastic Common Schema.\\n\\nIn this blog you will learn about the quantifiable operational benefits of ECS, how to leverage ECS with any data ingest tool, and the pitfalls to avoid. The data source leveraged in this blog is a 3.3GB Nginx log file obtained from Kaggle. The representation of this dataset is divided into three categories: raw, self, and ECS; with raw having zero normalization, self being a demonstration of commonly implemented mistakes observed from my 5+ years of experience working with various users, and finally ECS with the optimal approach of data hygiene.\\n\\nThis hygiene is achieved through the parsing, enrichment, and mapping of data ingested; akin to the sequencing of DNA in order to express genetic traits. Through the understanding of the data\'s structure, and assigning the correct mapping, a more thorough expression may be represented, stored and searched upon.\\n\\nIf you would like to learn more about ECS, the dataset used in this blog, or available Elastic integrations, please be sure to check out these related links:\\n\\n\\n\\n\\n- [Introducing the Elastic Common Schema](https://www.elastic.co/blog/introducing-the-elastic-common-schema)\\n\\n- [Kaggle Web Server Logs](https://www.kaggle.com/datasets/eliasdabbas/web-server-access-logs)\\n\\n- [Elastic Integrations](https://www.elastic.co/integrations/data-integrations)\\n\\n\\n## Dataset Validation\\n\\nBefore we begin, let us review how many documents exist and what we\'re required to ingest. We have 10,365,152 documents/events from our Nginx log file:\\n\\n![nginx access logs](/assets/images/dna-of-data/access-logs.png)\\n\\nWith 10,365,152 documents in our targeted end-state:\\n\\n![end state](/assets/images/dna-of-data/end-state.png)\\n\\n\\n## Dataset Ingestion: Raw & Self\\n\\nTo achieve the raw and self ingestion techniques, this example is leveraging Logstash for simplicity. For the raw data ingest, a simple file input with no additional modifications or index templates.\\n\\n\\n```\\n\\n input {\\n file {\\n id => \\"NGINX_FILE_INPUT\\"\\n path => \\"/etc/logstash/raw/access.log\\"\\n ecs_compatibility => disabled\\n start_position => \\"beginning\\"\\n mode => read\\n }\\n }\\n filter {\\n }\\n output {\\n elasticsearch {\\n hosts => [\\"https://mycluster.es.us-east4.gcp.elastic-cloud.com:9243\\"]\\n index => \\"nginx-raw\\"\\n ilm_enabled => true\\n manage_template => false\\n user => \\"username\\"\\n password => \\"password\\"\\n ssl_verification_mode => none\\n ecs_compatibility => disabled\\n id => \\"NGINX-FILE_ES_Output\\"\\n }\\n }\\n\\n```\\n\\nFor the self ingest, a custom Logstash pipeline with a simple Grok filter was created with no index template applied:\\n\\n\\n\\n```\\n input {\\n file {\\n id => \\"NGINX_FILE_INPUT\\"\\n path => \\"/etc/logstash/self/access.log\\"\\n ecs_compatibility => disabled\\n start_position => \\"beginning\\"\\n mode => read\\n }\\n }\\n filter {\\n grok {\\n match => { \\"message\\" => \\"%{IP:clientip} - (?:%{NOTSPACE:requestClient}|-) \\\\[%{HTTPDATE:timestamp}\\\\] \\\\\\"(?:%{WORD:requestMethod} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})\\\\\\" (?:-|%{NUMBER:response}) (?:-|%{NUMBER:bytes_in}) (-|%{QS:bytes_out}) %{QS:user_agent}\\" }\\n }\\n }\\n output {\\n elasticsearch {\\n hosts => [\\"https://myscluster.es.us-east4.gcp.elastic-cloud.com:9243\\"]\\n index => \\"nginx-self\\"\\n ilm_enabled => true\\n manage_template => false\\n user => \\"username\\"\\n password => \\"password\\"\\n ssl_verification_mode => none\\n ecs_compatibility => disabled\\n id => \\"NGINX-FILE_ES_Output\\"\\n }\\n }\\n```\\n\\n## Dataset Ingestion: ECS\\n\\nElastic comes included with many available integrations which contain everything you need to achieve to ensure that your data is ingested as efficiently as possible.\\n\\n![integrations](/assets/images/dna-of-data/integrations.png)\\n\\nFor our use case of Nginx, we\'ll be using the associated integration\'s assets only.\\n\\n![nginx integration](/assets/images/dna-of-data/nginx-integration.png)\\n\\nThe assets which are installed are more than just dashboards, there are ingest pipelines which not only normalize but enrich the data while simultaneously mapping the fields to their correct type via component templates. All we have to do is make sure that as the data is coming in, that it will traverse through the ingest pipeline and use these supplied mappings.\\n\\nCreate your index template, and select the supplied component templates provided from your integration.\\n\\n![nginx-ecs](/assets/images/dna-of-data/nginx-ecs.png)\\n\\nThink of the component templates like building blocks to an index template. These allow for the reuse of core settings, ensuring standardization is adopted across your data.\\n\\n![nginx-ecs-template](/assets/images/dna-of-data/nginx-ecs-template.png)\\n\\nFor our ingestion method, we merely point to the index name that we specified during the index template creation, in this case, `nginx-ecs` and Elastic will handle all the rest!\\n\\n```\\n input {\\n file {\\n id => \\"NGINX_FILE_INPUT\\"\\n path => \\"/etc/logstash/ecs/access.log\\"\\n #ecs_compatibility => disabled\\n start_position => \\"beginning\\"\\n mode => read\\n }\\n }\\n filter {\\n }\\n output {\\n elasticsearch {\\n hosts => [\\"https://mycluster.es.us-east4.gcp.elastic-cloud.com:9243\\"]\\n index => \\"nginx-ecs\\"\\n ilm_enabled => true\\n manage_template => false\\n user => \\"username\\"\\n password => \\"password\\"\\n ssl_verification_mode => none\\n ecs_compatibility => disabled\\n id => \\"NGINX-FILE_ES_Output\\"\\n }\\n }\\n\\n```\\n\\n## Data Fidelity Comparison\\n\\nLet\'s compare how many fields are available to search upon the three indices as well as the quality of the data. Our raw index has but 15 fields to search upon, with most being duplicates for aggregation purposes.\\n\\n![nginx-raw](/assets/images/dna-of-data/nginx-raw.png)\\n\\n![mapping-1](/assets/images/dna-of-data/mapping-1.png)\\n\\nHowever from a Discover perspective, we are limited to `6` fields!\\n\\n![nginx-raw-discover](/assets/images/dna-of-data/nginx-raw-discover.png)\\n\\nOur self-parsed index has 37 available fields, however these too are duplicated and not ideal for efficient searching.\\n\\n![nginx-self](/assets/images/dna-of-data/nginx-self.png)\\n\\n![mapping-2](/assets/images/dna-of-data/mapping-2.png)\\n\\nFrom a Discover perspective here we have almost 3x as many fields to choose from, yet without the correct mapping the ease of which this data may be searched is less than ideal. A great example of this, is attempting to calculate the average bytes_in on a text field.\\n\\n![nginx-self-discover](/assets/images/dna-of-data/nginx-self-discover.png)\\n\\nFinally with our ECS index, we have 71 fields available to us! Notice that courtesy of the ingest pipeline, we have enriched fields of geographic information as well as event categorial fields.\\n\\n![nginx-ecs-pipeline](/assets/images/dna-of-data/nginx-ecs-pipeline.png)\\n\\n![mapping-3](/assets/images/dna-of-data/mapping-3.png)\\n\\n\\n\\n\\nNow what about Discover? There were 51 fields directly available to us for searching purposes:\\n\\n![nginx-ecs-discover](/assets/images/dna-of-data/nginx-ecs-discover.png)\\n\\nUsing Discover as our basis, our self-parsed index has 283% more fields to search upon whereas our ECS index has 850%!\xa0\\n\\n![table-1](/assets/images/dna-of-data/table-1.png)\\n\\n\\n## Storage Utilization Comparison\\n\\nSurely with all these fields in our ECS index the size would be exponentially larger than the self normalized index, let alone the raw index? The results may surprise you.\\n\\n![total-storage](/assets/images/dna-of-data/total-storage.png)\\n\\nAccounting for the replica of data of our 3.3GB size data set, we can see that the impact of normalized and mapped data has a significant impact on the amount of storage required. \\n\\n![table-2](/assets/images/dna-of-data/table-2.png)\\n\\n\\n## Conclusion\\n\\nWhile there is an increase in the amount required storage for any dataset that is enriched, Elastic provides easy solutions to maximize the fidelity of the data to be searched while simultaneously ensuring operational storage efficiency; that is the power of the Elastic Common Schema.\\n\\nLet\'s review how we were able to maximize search, while minimizing storage\\n\\n- Installing integration assets for our dataset that we are going to ingest.\\n\\n* Customizing the index template to leverage the included components to ensure mapping and parsing are aligned to the Elastic Common Schema.\\n\\nReady to get started? Sign up [for Elastic Cloud](https://cloud.elastic.co/registration) and try out the features and capabilities I\'ve outlined above to get the most value and visibility out of your data.\\n","code":"var Component=(()=>{var g=Object.create;var s=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var i in e)s(t,i,{get:e[i],enumerable:!0})},r=(t,e,i,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!f.call(t,a)&&a!==i&&s(t,a,{get:()=>e[a],enumerable:!(o=p(e,a))||o.enumerable});return t};var b=(t,e,i)=>(i=t!=null?g(u(t)):{},r(e||!t||!t.__esModule?s(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>r(s({},\\"__esModule\\",{value:!0}),t);var d=w((S,l)=>{l.exports=_jsx_runtime});var _={};y(_,{default:()=>c,frontmatter:()=>x});var n=b(d()),x={title:\\"The DNA of DATA Increasing Efficiency with the Elastic Common Schema\\",slug:\\"dna-of-data\\",date:\\"2024-09-25\\",description:\\"Elastic ECS helps improve semantic conversion of log fields. Learn how quantifying the benefits of normalized data, not just for infrastructure efficiency, but also data fidelity.\\",author:[{slug:\\"peter-titov\\"}],image:\\"dna-of-data.jpg\\",tags:[{slug:\\"log-analytics\\"}]};function h(t){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"The Elastic Common Schema is a fantastic way to simplify and unify a search experience. By aligning disparate data sources into a common language, users have a lower bar to overcome with interpreting events of interest, resolving incidents or hunting for unknown threats. However, there are underlying infrastructure reasons to justify adopting the Elastic Common Schema.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog you will learn about the quantifiable operational benefits of ECS, how to leverage ECS with any data ingest tool, and the pitfalls to avoid. The data source leveraged in this blog is a 3.3GB Nginx log file obtained from Kaggle. The representation of this dataset is divided into three categories: raw, self, and ECS; with raw having zero normalization, self being a demonstration of commonly implemented mistakes observed from my 5+ years of experience working with various users, and finally ECS with the optimal approach of data hygiene.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This hygiene is achieved through the parsing, enrichment, and mapping of data ingested; akin to the sequencing of DNA in order to express genetic traits. Through the understanding of the data\'s structure, and assigning the correct mapping, a more thorough expression may be represented, stored and searched upon.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you would like to learn more about ECS, the dataset used in this blog, or available Elastic integrations, please be sure to check out these related links:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/introducing-the-elastic-common-schema\\",rel:\\"nofollow\\",children:\\"Introducing the Elastic Common Schema\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.kaggle.com/datasets/eliasdabbas/web-server-access-logs\\",rel:\\"nofollow\\",children:\\"Kaggle Web Server Logs\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations\\",rel:\\"nofollow\\",children:\\"Elastic Integrations\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"dataset-validation\\",children:\\"Dataset Validation\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Before we begin, let us review how many documents exist and what we\'re required to ingest. We have 10,365,152 documents/events from our Nginx log file:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/access-logs.png\\",alt:\\"nginx access logs\\",width:\\"751\\",height:\\"231\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"With 10,365,152 documents in our targeted end-state:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/end-state.png\\",alt:\\"end state\\",width:\\"757\\",height:\\"154\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"dataset-ingestion-raw--self\\",children:\\"Dataset Ingestion: Raw & Self\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To achieve the raw and self ingestion techniques, this example is leveraging Logstash for simplicity. For the raw data ingest, a simple file input with no additional modifications or index templates.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`\\n input {\\n file {\\n id => \\"NGINX_FILE_INPUT\\"\\n path => \\"/etc/logstash/raw/access.log\\"\\n ecs_compatibility => disabled\\n start_position => \\"beginning\\"\\n mode => read\\n }\\n }\\n filter {\\n }\\n output {\\n elasticsearch {\\n hosts => [\\"https://mycluster.es.us-east4.gcp.elastic-cloud.com:9243\\"]\\n index => \\"nginx-raw\\"\\n ilm_enabled => true\\n manage_template => false\\n user => \\"username\\"\\n password => \\"password\\"\\n ssl_verification_mode => none\\n ecs_compatibility => disabled\\n id => \\"NGINX-FILE_ES_Output\\"\\n }\\n }\\n\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"For the self ingest, a custom Logstash pipeline with a simple Grok filter was created with no index template applied:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:` input {\\n file {\\n id => \\"NGINX_FILE_INPUT\\"\\n path => \\"/etc/logstash/self/access.log\\"\\n ecs_compatibility => disabled\\n start_position => \\"beginning\\"\\n mode => read\\n }\\n }\\n filter {\\n grok {\\n match => { \\"message\\" => \\"%{IP:clientip} - (?:%{NOTSPACE:requestClient}|-) \\\\\\\\[%{HTTPDATE:timestamp}\\\\\\\\] \\\\\\\\\\"(?:%{WORD:requestMethod} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})\\\\\\\\\\" (?:-|%{NUMBER:response}) (?:-|%{NUMBER:bytes_in}) (-|%{QS:bytes_out}) %{QS:user_agent}\\" }\\n }\\n }\\n output {\\n elasticsearch {\\n hosts => [\\"https://myscluster.es.us-east4.gcp.elastic-cloud.com:9243\\"]\\n index => \\"nginx-self\\"\\n ilm_enabled => true\\n manage_template => false\\n user => \\"username\\"\\n password => \\"password\\"\\n ssl_verification_mode => none\\n ecs_compatibility => disabled\\n id => \\"NGINX-FILE_ES_Output\\"\\n }\\n }\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"dataset-ingestion-ecs\\",children:\\"Dataset Ingestion: ECS\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic comes included with many available integrations which contain everything you need to achieve to ensure that your data is ingested as efficiently as possible.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/integrations.png\\",alt:\\"integrations\\",width:\\"1191\\",height:\\"862\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"For our use case of Nginx, we\'ll be using the associated integration\'s assets only.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/nginx-integration.png\\",alt:\\"nginx integration\\",width:\\"934\\",height:\\"420\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The assets which are installed are more than just dashboards, there are ingest pipelines which not only normalize but enrich the data while simultaneously mapping the fields to their correct type via component templates. All we have to do is make sure that as the data is coming in, that it will traverse through the ingest pipeline and use these supplied mappings.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Create your index template, and select the supplied component templates provided from your integration.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/nginx-ecs.png\\",alt:\\"nginx-ecs\\",width:\\"1013\\",height:\\"588\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Think of the component templates like building blocks to an index template. These allow for the reuse of core settings, ensuring standardization is adopted across your data.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/nginx-ecs-template.png\\",alt:\\"nginx-ecs-template\\",width:\\"605\\",height:\\"623\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"For our ingestion method, we merely point to the index name that we specified during the index template creation, in this case, \\",(0,n.jsx)(e.code,{children:\\"nginx-ecs\\"}),\\" and Elastic will handle all the rest!\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:` input {\\n file {\\n id => \\"NGINX_FILE_INPUT\\"\\n path => \\"/etc/logstash/ecs/access.log\\"\\n #ecs_compatibility => disabled\\n start_position => \\"beginning\\"\\n mode => read\\n }\\n }\\n filter {\\n }\\n output {\\n elasticsearch {\\n hosts => [\\"https://mycluster.es.us-east4.gcp.elastic-cloud.com:9243\\"]\\n index => \\"nginx-ecs\\"\\n ilm_enabled => true\\n manage_template => false\\n user => \\"username\\"\\n password => \\"password\\"\\n ssl_verification_mode => none\\n ecs_compatibility => disabled\\n id => \\"NGINX-FILE_ES_Output\\"\\n }\\n }\\n\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"data-fidelity-comparison\\",children:\\"Data Fidelity Comparison\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\'s compare how many fields are available to search upon the three indices as well as the quality of the data. Our raw index has but 15 fields to search upon, with most being duplicates for aggregation purposes.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/nginx-raw.png\\",alt:\\"nginx-raw\\",width:\\"517\\",height:\\"199\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/mapping-1.png\\",alt:\\"mapping-1\\",width:\\"674\\",height:\\"362\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"However from a Discover perspective, we are limited to \\",(0,n.jsx)(e.code,{children:\\"6\\"}),\\" fields!\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/nginx-raw-discover.png\\",alt:\\"nginx-raw-discover\\",width:\\"301\\",height:\\"424\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Our self-parsed index has 37 available fields, however these too are duplicated and not ideal for efficient searching.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/nginx-self.png\\",alt:\\"nginx-self\\",width:\\"513\\",height:\\"193\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/mapping-2.png\\",alt:\\"mapping-2\\",width:\\"541\\",height:\\"532\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"From a Discover perspective here we have almost 3x as many fields to choose from, yet without the correct mapping the ease of which this data may be searched is less than ideal. A great example of this, is attempting to calculate the average bytes_in on a text field.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/nginx-self-discover.png\\",alt:\\"nginx-self-discover\\",width:\\"297\\",height:\\"719\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Finally with our ECS index, we have 71 fields available to us! Notice that courtesy of the ingest pipeline, we have enriched fields of geographic information as well as event categorial fields.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/nginx-ecs-pipeline.png\\",alt:\\"nginx-ecs-pipeline\\",width:\\"504\\",height:\\"190\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/mapping-3.png\\",alt:\\"mapping-3\\",width:\\"597\\",height:\\"603\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now what about Discover? There were 51 fields directly available to us for searching purposes:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/nginx-ecs-discover.png\\",alt:\\"nginx-ecs-discover\\",width:\\"306\\",height:\\"786\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Using Discover as our basis, our self-parsed index has 283% more fields to search upon whereas our ECS index has 850%!\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/table-1.png\\",alt:\\"table-1\\",width:\\"554\\",height:\\"323\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"storage-utilization-comparison\\",children:\\"Storage Utilization Comparison\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Surely with all these fields in our ECS index the size would be exponentially larger than the self normalized index, let alone the raw index? The results may surprise you.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/total-storage.png\\",alt:\\"total-storage\\",width:\\"1006\\",height:\\"184\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Accounting for the replica of data of our 3.3GB size data set, we can see that the impact of normalized and mapped data has a significant impact on the amount of storage required.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/dna-of-data/table-2.png\\",alt:\\"table-2\\",width:\\"532\\",height:\\"161\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"While there is an increase in the amount required storage for any dataset that is enriched, Elastic provides easy solutions to maximize the fidelity of the data to be searched while simultaneously ensuring operational storage efficiency; that is the power of the Elastic Common Schema.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\'s review how we were able to maximize search, while minimizing storage\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Installing integration assets for our dataset that we are going to ingest.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Customizing the index template to leverage the included components to ensure mapping and parsing are aligned to the Elastic Common Schema.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Ready to get started? Sign up \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"for Elastic Cloud\\"}),\\" and try out the features and capabilities I\'ve outlined above to get the most value and visibility out of your data.\\"]})]})}function c(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return v(_);})();\\n;return Component;"},"_id":"articles/dna-of-data.mdx","_raw":{"sourceFilePath":"articles/dna-of-data.mdx","sourceFileName":"dna-of-data.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/dna-of-data"},"type":"Article","imageUrl":"/assets/images/dna-of-data/dna-of-data.jpg","readingTime":"9 min read","url":"/dna-of-data","headings":[{"level":2,"title":"Dataset Validation","href":"#dataset-validation"},{"level":2,"title":"Dataset Ingestion: Raw & Self","href":"#dataset-ingestion-raw--self"},{"level":2,"title":"Dataset Ingestion: ECS","href":"#dataset-ingestion-ecs"},{"level":2,"title":"Data Fidelity Comparison","href":"#data-fidelity-comparison"},{"level":2,"title":"Storage Utilization Comparison","href":"#storage-utilization-comparison"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Using the Elastic Agent to monitor Amazon ECS and AWS Fargate with Elastic Observability","slug":"elastic-agent-monitor-ecs-aws-fargate-observability","date":"2023-06-15","description":"In this article, we’ll guide you through how to install the Elastic Agent with the AWS Fargate integration as a sidecar container to send host metrics and logs to Elastic Observability.","image":"blog-thumb-observability-pattern-color.png","author":[{"slug":"alexis-roberson","type":"Author","_raw":{}}],"tags":[{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"aws","type":"Tag","_raw":{}},{"slug":"containers","type":"Tag","_raw":{}},{"slug":"elastic-agent","type":"Tag","_raw":{}},{"slug":"aws-fargate","type":"Tag","_raw":{}}],"body":{"raw":"\\n## Serverless and AWS ECS Fargate\\n\\nAWS Fargate is a serverless pay-as-you-go engine used for Amazon Elastic Container Service (ECS) to run Docker containers without having to manage servers or clusters. The goal of Fargate is to containerize your application and specify the OS, CPU and memory, networking, and IAM policies needed for launch. Additionally, AWS Fargate can be used with Elastic Kubernetes Service (EKS) in a [similar manner](https://docs.aws.amazon.com/eks/latest/userguide/fargate.html).\\n\\nAlthough the provisioning of servers would be handled by a third party, the need to understand the health and performance of containers within your serverless environment becomes even more vital in identifying root causes and system interruptions. Serverless still requires observability. Elastic Observability can provide observability for not only AWS ECS with Fargate, as we will discuss in this blog, but also for a number of AWS services (EC2, RDS, ELB, etc). See our [previous blog](https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy) on managing an EC2-based application with Elastic Observability.\\n\\n## Gaining full visibility with Elastic Observability\\n\\nElastic Observability is governed by the three pillars involved in creating full visibility within a system: logs, metrics, and traces. Logs list all the events that have taken place in the system. Metrics keep track of data that will tell you if the system is down, like response time, CPU usage, memory usage, and latency. Traces give a good indication of the performance of your system based on the execution of requests.\\n\\nThese pillars by themselves offer some insight, but combining them allows for you to see the full scope of your system and how it handles increases in load or traffic over time. Connecting Elastic Observability to your serverless environment will help you deal with outages quicker and perform root cause analysis to prevent any future problems.\\n\\nIn this article, we’ll guide you through how to install the Elastic Agent with the [AWS Fargate](https://docs.elastic.co/integrations/awsfargate) integration as a sidecar container to send host metrics and logs to Elastic Observability.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/Screenshot_2023-06-16_at_12.58.05_PM.png)\\n\\n## Prerequisites:\\n\\n- AWS account with AWS CLI configured\\n- GitHub account\\n- Elastic Cloud account\\n- An app running on a container in AWS\\n\\nThis tutorial is divided into two parts:\\n\\n1. Set up the Fleet server to be used by the sidecar container in AWS.\\n2. Create the sidecar container in AWS Fargate to send data back to Elastic Observability.\\n\\n## Part I: Set up the Fleet server\\n\\nFirst, let’s log in to Elastic Cloud.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image4.png)\\n\\nYou can either create a new deployment or use an existing one.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image35.png)\\n\\nFrom the **Home** page, use the side panel to scroll to Management \\\\> Fleet \\\\> Agent policies. Click **Add policy**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image30.png)\\n\\nClick **Create agent policy**. Here we’ll create a policy to attach to the Fleet agent.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image38.png)\\n\\nGive the policy a name and save changes.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image44.png)\\n\\nClick **Create agent policy**. You should see the agent policy AWS Fargate in the list of policies.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image42.png)\\n\\nNow that we have an agent policy, let’s add the integration to collect logs and metrics from the host. Click on **AWS Fargate -\\\\> Add integration**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image19.png)\\n\\nWe’ll be adding to the policy AWS to collect overall AWS metrics and AWS Fargate to collect metrics from this integration. You can find each one by typing them in the search bar.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image1.png)\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image34.png)\\n\\nOnce you click on the integration, it will take you to its landing page, where you can add it to the policy.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image48.png)\\n\\nFor the AWS integration, the only collection settings that we will configure are Collect billing metrics, Collect logs from CloudWatch, Collect metrics from CloudWatch, Collect ECS metrics, and Collect Usage metrics. Everything else can be left disabled.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/Screenshot_2023-06-15_at_11.35.28_AM.png)\\n\\nAnother thing to keep in mind when using this integration is the set of permissions required to collect data from AWS. This can be found on the AWS integration page under AWS permissions. Take note of these permissions, as we will use them to create an IAM policy.\\n\\nNext, we will add the AWS Fargate integration, which doesn’t require further configuration settings.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image37.png)\\n\\nNow that we have created the agent policy and attached the proper integrations, let’s create the agent that will implement the policy. Navigate back to the main Fleet page and click **Add agent**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image41.png)\\n\\nSince we’ll be connecting to AWS Fargate through ECS, the host type should be set to this value. All the other default values can stay the same.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image15.png)\\n\\nLastly, let’s create the enrollment token and attach the agent policy. This will enable AWS ECS Fargate to access Elastic and send data.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image6.png)\\n\\nOnce created, you should be able to see policy name, secret, and agent policy listed.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image43.png)\\n\\nWe’ll be using our Fleet credentials in the next step to send data to Elastic from AWS Fargate.\\n\\n## Part II: Send data to Elastic Observability\\n\\nIt’s time to create our ECS Cluster, Service, and task definition in order to start running the container.\\n\\nLog in to your AWS account and navigate to ECS.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image46.png)\\n\\nWe’ll start by creating the cluster.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image9.png)\\n\\nAdd a name to the Cluster. And for subnets, only select the first two for us-east-1a and us-eastlb.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image10.png)\\n\\nFor the sake of the demo, we’ll keep the rest of the options set to default. Click **Create**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image11.png)\\n\\nWe should see the cluster we created listed below.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/Screenshot_2023-06-15_at_11.15.51_AM.png)\\n\\nNow that we’ve created our cluster to host our container, we want to create a task definition that will be used to set up our container. But before we do this, we will need to create a task role with an associated policy. This task role will allow for AWS metrics to be sent from AWS to the Elastic Agent.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image47.png)\\n\\nNavigate to IAM in AWS.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image32.png)\\n\\nGo to **Policies -\\\\> Create policy**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image31.png)\\n\\nNow we will reference the AWS permissions from the Fleet AWS integration page and use them to configure the policy. In addition to these permissions, we will also add the GetAtuhenticationToken action for ECR.\\n\\nYou can configure each one using the visual editor.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image22.png)\\n\\nOr, use the JSON option. Don’t forget to replace the \\\\ with your own.\\n\\n```json\\n{\\n \\"Version\\": \\"2012-10-17\\",\\n \\"Statement\\": [\\n {\\n \\"Sid\\": \\"VisualEditor0\\",\\n \\"Effect\\": \\"Allow\\",\\n \\"Action\\": [\\n \\"sqs:DeleteMessage\\",\\n \\"sqs:ChangeMessageVisibility\\",\\n \\"sqs:ReceiveMessage\\",\\n \\"ecr:GetDownloadUrlForLayer\\",\\n \\"ecr:UploadLayerPart\\",\\n \\"ecr:PutImage\\",\\n \\"sts:AssumeRole\\",\\n \\"rds:ListTagsForResource\\",\\n \\"ecr:BatchGetImage\\",\\n \\"ecr:CompleteLayerUpload\\",\\n \\"rds:DescribeDBInstances\\",\\n \\"logs:FilterLogEvents\\",\\n \\"ecr:InitiateLayerUpload\\",\\n \\"ecr:BatchCheckLayerAvailability\\"\\n ],\\n \\"Resource\\": [\\n \\"arn:aws:iam:::role/*\\",\\n \\"arn:aws:logs:*::log-group:*\\",\\n \\"arn:aws:sqs:*::*\\",\\n \\"arn:aws:ecr:*::repository/*\\",\\n \\"arn:aws:rds:*::target-group:*\\",\\n \\"arn:aws:rds:*::subgrp:*\\",\\n \\"arn:aws:rds:*::pg:*\\",\\n \\"arn:aws:rds:*::ri:*\\",\\n \\"arn:aws:rds:*::cluster-snapshot:*\\",\\n \\"arn:aws:rds:*::cev:*/*/*\\",\\n \\"arn:aws:rds:*::og:*\\",\\n \\"arn:aws:rds:*::db:*\\",\\n \\"arn:aws:rds:*::es:*\\",\\n \\"arn:aws:rds:*::db-proxy-endpoint:*\\",\\n \\"arn:aws:rds:*::secgrp:*\\",\\n \\"arn:aws:rds:*::cluster:*\\",\\n \\"arn:aws:rds:*::cluster-pg:*\\",\\n \\"arn:aws:rds:*::cluster-endpoint:*\\",\\n \\"arn:aws:rds:*::db-proxy:*\\",\\n \\"arn:aws:rds:*::snapshot:*\\"\\n ]\\n },\\n {\\n \\"Sid\\": \\"VisualEditor1\\",\\n \\"Effect\\": \\"Allow\\",\\n \\"Action\\": [\\n \\"sqs:ListQueues\\",\\n \\"organizations:ListAccounts\\",\\n \\"ec2:DescribeInstances\\",\\n \\"tag:GetResources\\",\\n \\"cloudwatch:GetMetricData\\",\\n \\"ec2:DescribeRegions\\",\\n \\"iam:ListAccountAliases\\",\\n \\"sns:ListTopics\\",\\n \\"sts:GetCallerIdentity\\",\\n \\"cloudwatch:ListMetrics\\"\\n ],\\n \\"Resource\\": \\"*\\"\\n },\\n {\\n \\"Sid\\": \\"VisualEditor2\\",\\n \\"Effect\\": \\"Allow\\",\\n \\"Action\\": \\"ecr:GetAuthorizationToken\\",\\n \\"Resource\\": \\"arn:aws:ecr:*::repository/*\\"\\n }\\n ]\\n}\\n```\\n\\nReview your changes.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image3.png)\\n\\nNow let’s attach this policy to a role. Navigate to **IAM -\\\\> Roles**. Click **Create role**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image45.png)\\n\\nSelect AWS service as Trusted entity type and select EC2 as Use case. Click **Next**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image24.png)\\n\\nUnder permissions policies, select the policy we just created, as well as CloudWatchLogsFullAccess and AmazonEC2ContainerRegistryFullAccess. Click **Next**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image27.png)\\n\\nGive the task role a name and description.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image39.png)\\n\\nClick **Create role**.\\n\\nNow it’s time to create the task definition. Navigate to **ECS -\\\\> Task definitions**. Click **Create new task definition**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image21.png)\\n\\nLet’s give this task definition a name.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image14.png)\\n\\nAfter giving the task definition a name, you’ll add the Fleet credentials to the container section, which you can obtain from the Enrollment Tokens section of the Fleet section in Elastic Cloud. This allows us to host the Elastic Agent on the ECS container as a sidecar and send data to Elastic using Fleet credentials.\\n\\n- Container name: **elastic-agent-container**\\n\\n- Image: **docker.elastic.co/beats/elastic-agent:8.16.1**\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image40.png)\\n\\nNow let’s add the environment variables:\\n\\n- FLEET_ENROLL: **yes**\\n\\n- FLEET_ENROLLMENT_TOKEN: **\\\\**\\n\\n- FLEET_URL: **\\\\**\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image26.png)\\n\\nFor the sake of the demo, leave Environment, Monitoring, Storage, and Tags as default values. Now we will need to create a second container to run the image for the golang app stored in ECR. Click **Add more containers**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image5.png)\\n\\nFor Environment, we will reserve 1 vCPU and 3 GB of memory. Under Task role, search for the role we created that uses the IAM policy.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image7.png)\\n\\nReview the changes, then click **Create**.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image25.png)\\n\\nYou should see your new task definition included in the list.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image20.png)\\n\\nThe final step is to create the service that will connect directly to the fleet server. \\nNavigate to the cluster you created and click **Create** under the Service tab.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image18.png)\\n\\nLet’s get our service environment configured.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image28.png)\\n\\nSet up the deployment configuration. Here you should provide the name of the task definition you created in the previous step. Also, provide the service with a unique name. Set the number of **desired tasks** to 2 instead of 1.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image16.png)\\n\\nClick **Create**. Now your service is running two tasks in your cluster using the task definition you provided.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image33.png)\\n\\nTo recap, we set up a Fleet server in Elastic Cloud to receive AWS Fargate data. We then created our AWS Fargate cluster task definition with the Fleet credentials implemented within the container. Lastly, we created the service to send data about our host to Elastic.\\n\\nNow let’s verify our Elastic Agent is healthy and properly receiving data from AWS Fargate.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image36.png)\\n\\nWe can also view a better breakdown of our agent on the Observability Overview page.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image2.png)\\n\\nIf we drill down to hosts, by clicking on host name we should be able to see more granular data. For instance, we can see the CPU Usage of the Elastic Agent itself that is deployed in our AWS Fargate environment.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image8.png)\\n\\nLastly, we can view the AWS Fargate dashboard generated using the data collected by our Elastic Agent. This is an out-of-the-box dashboard that can also be customized based on the data you would like to visualize.\\n\\n![](/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image23.png)\\n\\nAs you can see in the dashboard we’re able to filter based on running tasks, as well as see a list of containers running in our environment. Something else that could be useful to show is the CPU usage per cluster as shown under CPU Utilization per Cluster.\\n\\nThe dashboard can pull data from different sources and in this case shows data for both AWS Fargate and the greater ECS cluster. The two containers at the bottom display the CPU and memory usage directly from ECS.\\n\\n## Conclusion\\n\\nIn this article, we showed how to send data from AWS Fargate to Elastic Observability using the Elastic Agent and Fleet. Serverless architectures are quickly becoming industry standard in offloading the management of servers to third parties. However, this does not alleviate the responsibility of operations engineers to manage the data generated within these environments. Elastic Observability provides a way to not only ingest the data from serverless architectures, but also establish a roadmap to address future problems.\\n\\nStart your own [7-day free trial](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el) by signing up via [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=d54b31eb-671c-49ba-88bb-7a1106421dfa%E2%89%BBchannel=el) and quickly spin up a deployment in minutes on any of the [Elastic Cloud regions on AWS](https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions) around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\n\\n**More resources on serverless and observability and AWS:**\\n\\n- [Analyze your AWS application’s service metrics on Elastic Observability (EC2, ELB, RDS, and NAT)](https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy)\\n- [Get visibility into AWS Lambda serverless functions with Elastic Observability](https://www.elastic.co/blog/observability-apm-aws-lambda-serverless-functions)\\n- [Trace-based testing with Elastic APM and Tracetest](https://www.elastic.co/blog/trace-based-testing-elastic-apm-tracetest)\\n- [Sending AWS logs into Elastic via AWS Firehose](https://www.elastic.co/blog/aws-kinesis-data-firehose-elastic-observability-analytics)\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var g=Object.create;var s=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var b=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),y=(i,e)=>{for(var a in e)s(i,a,{get:e[a],enumerable:!0})},o=(i,e,a,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of p(e))!w.call(i,n)&&n!==a&&s(i,n,{get:()=>e[n],enumerable:!(r=m(e,n))||r.enumerable});return i};var f=(i,e,a)=>(a=i!=null?g(u(i)):{},o(e||!i||!i.__esModule?s(a,\\"default\\",{value:i,enumerable:!0}):a,i)),v=i=>o(s({},\\"__esModule\\",{value:!0}),i);var c=b((C,l)=>{l.exports=_jsx_runtime});var S={};y(S,{default:()=>d,frontmatter:()=>A});var t=f(c()),A={title:\\"Using the Elastic Agent to monitor Amazon ECS and AWS Fargate with Elastic Observability\\",slug:\\"elastic-agent-monitor-ecs-aws-fargate-observability\\",date:\\"2023-06-15\\",description:\\"In this article, we\\\\u2019ll guide you through how to install the Elastic Agent with the AWS Fargate integration as a sidecar container to send host metrics and logs to Elastic Observability.\\",author:[{slug:\\"alexis-roberson\\"}],image:\\"blog-thumb-observability-pattern-color.png\\",tags:[{slug:\\"metrics\\"},{slug:\\"cloud-monitoring\\"},{slug:\\"aws\\"},{slug:\\"containers\\"},{slug:\\"elastic-agent\\"},{slug:\\"aws-fargate\\"}]};function h(i){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.h2,{id:\\"serverless-and-aws-ecs-fargate\\",children:\\"Serverless and AWS ECS Fargate\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"AWS Fargate is a serverless pay-as-you-go engine used for Amazon Elastic Container Service (ECS) to run Docker containers without having to manage servers or clusters. The goal of Fargate is to containerize your application and specify the OS, CPU and memory, networking, and IAM policies needed for launch. Additionally, AWS Fargate can be used with Elastic Kubernetes Service (EKS) in a \\",(0,t.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/eks/latest/userguide/fargate.html\\",rel:\\"nofollow\\",children:\\"similar manner\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Although the provisioning of servers would be handled by a third party, the need to understand the health and performance of containers within your serverless environment becomes even more vital in identifying root causes and system interruptions. Serverless still requires observability. Elastic Observability can provide observability for not only AWS ECS with Fargate, as we will discuss in this blog, but also for a number of AWS services (EC2, RDS, ELB, etc). See our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\" on managing an EC2-based application with Elastic Observability.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"gaining-full-visibility-with-elastic-observability\\",children:\\"Gaining full visibility with Elastic Observability\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic Observability is governed by the three pillars involved in creating full visibility within a system: logs, metrics, and traces. Logs list all the events that have taken place in the system. Metrics keep track of data that will tell you if the system is down, like response time, CPU usage, memory usage, and latency. Traces give a good indication of the performance of your system based on the execution of requests.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"These pillars by themselves offer some insight, but combining them allows for you to see the full scope of your system and how it handles increases in load or traffic over time. Connecting Elastic Observability to your serverless environment will help you deal with outages quicker and perform root cause analysis to prevent any future problems.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this article, we\\\\u2019ll guide you through how to install the Elastic Agent with the \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/awsfargate\\",rel:\\"nofollow\\",children:\\"AWS Fargate\\"}),\\" integration as a sidecar container to send host metrics and logs to Elastic Observability.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/Screenshot_2023-06-16_at_12.58.05_PM.png\\",alt:\\"\\",width:\\"1244\\",height:\\"708\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"AWS account with AWS CLI configured\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"GitHub account\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Elastic Cloud account\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"An app running on a container in AWS\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This tutorial is divided into two parts:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Set up the Fleet server to be used by the sidecar container in AWS.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Create the sidecar container in AWS Fargate to send data back to Elastic Observability.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"part-i-set-up-the-fleet-server\\",children:\\"Part I: Set up the Fleet server\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"First, let\\\\u2019s log in to Elastic Cloud.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image4.png\\",alt:\\"\\",width:\\"489\\",height:\\"603\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can either create a new deployment or use an existing one.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image35.png\\",alt:\\"\\",width:\\"1062\\",height:\\"234\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"From the \\",(0,t.jsx)(e.strong,{children:\\"Home\\"}),\\" page, use the side panel to scroll to Management > Fleet > Agent policies. Click \\",(0,t.jsx)(e.strong,{children:\\"Add policy\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image30.png\\",alt:\\"\\",width:\\"1737\\",height:\\"1038\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Create agent policy\\"}),\\". Here we\\\\u2019ll create a policy to attach to the Fleet agent.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image38.png\\",alt:\\"\\",width:\\"1894\\",height:\\"647\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Give the policy a name and save changes.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image44.png\\",alt:\\"\\",width:\\"672\\",height:\\"878\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Create agent policy\\"}),\\". You should see the agent policy AWS Fargate in the list of policies.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image42.png\\",alt:\\"\\",width:\\"1246\\",height:\\"394\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that we have an agent policy, let\\\\u2019s add the integration to collect logs and metrics from the host. Click on \\",(0,t.jsx)(e.strong,{children:\\"AWS Fargate -> Add integration\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image19.png\\",alt:\\"\\",width:\\"1224\\",height:\\"332\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\\\\u2019ll be adding to the policy AWS to collect overall AWS metrics and AWS Fargate to collect metrics from this integration. You can find each one by typing them in the search bar.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image1.png\\",alt:\\"\\",width:\\"1999\\",height:\\"597\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image34.png\\",alt:\\"\\",width:\\"1999\\",height:\\"770\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you click on the integration, it will take you to its landing page, where you can add it to the policy.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image48.png\\",alt:\\"\\",width:\\"1999\\",height:\\"946\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For the AWS integration, the only collection settings that we will configure are Collect billing metrics, Collect logs from CloudWatch, Collect metrics from CloudWatch, Collect ECS metrics, and Collect Usage metrics. Everything else can be left disabled.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/Screenshot_2023-06-15_at_11.35.28_AM.png\\",alt:\\"\\",width:\\"1212\\",height:\\"1072\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Another thing to keep in mind when using this integration is the set of permissions required to collect data from AWS. This can be found on the AWS integration page under AWS permissions. Take note of these permissions, as we will use them to create an IAM policy.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Next, we will add the AWS Fargate integration, which doesn\\\\u2019t require further configuration settings.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image37.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1023\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that we have created the agent policy and attached the proper integrations, let\\\\u2019s create the agent that will implement the policy. Navigate back to the main Fleet page and click \\",(0,t.jsx)(e.strong,{children:\\"Add agent\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image41.png\\",alt:\\"\\",width:\\"1286\\",height:\\"570\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Since we\\\\u2019ll be connecting to AWS Fargate through ECS, the host type should be set to this value. All the other default values can stay the same.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image15.png\\",alt:\\"\\",width:\\"1432\\",height:\\"1240\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Lastly, let\\\\u2019s create the enrollment token and attach the agent policy. This will enable AWS ECS Fargate to access Elastic and send data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image6.png\\",alt:\\"\\",width:\\"1248\\",height:\\"583\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once created, you should be able to see policy name, secret, and agent policy listed.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image43.png\\",alt:\\"\\",width:\\"1246\\",height:\\"328\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\\\\u2019ll be using our Fleet credentials in the next step to send data to Elastic from AWS Fargate.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"part-ii-send-data-to-elastic-observability\\",children:\\"Part II: Send data to Elastic Observability\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"It\\\\u2019s time to create our ECS Cluster, Service, and task definition in order to start running the container.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Log in to your AWS account and navigate to ECS.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image46.png\\",alt:\\"\\",width:\\"957\\",height:\\"498\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\\\\u2019ll start by creating the cluster.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image9.png\\",alt:\\"\\",width:\\"1999\\",height:\\"458\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Add a name to the Cluster. And for subnets, only select the first two for us-east-1a and us-eastlb.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image10.png\\",alt:\\"\\",width:\\"1714\\",height:\\"1392\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For the sake of the demo, we\\\\u2019ll keep the rest of the options set to default. Click \\",(0,t.jsx)(e.strong,{children:\\"Create\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image11.png\\",alt:\\"\\",width:\\"1718\\",height:\\"1192\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We should see the cluster we created listed below.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/Screenshot_2023-06-15_at_11.15.51_AM.png\\",alt:\\"\\",width:\\"1248\\",height:\\"294\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that we\\\\u2019ve created our cluster to host our container, we want to create a task definition that will be used to set up our container. But before we do this, we will need to create a task role with an associated policy. This task role will allow for AWS metrics to be sent from AWS to the Elastic Agent.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image47.png\\",alt:\\"\\",width:\\"1524\\",height:\\"1266\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Navigate to IAM in AWS.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image32.png\\",alt:\\"\\",width:\\"1930\\",height:\\"1454\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Go to \\",(0,t.jsx)(e.strong,{children:\\"Policies -> Create policy\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image31.png\\",alt:\\"\\",width:\\"1999\\",height:\\"482\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now we will reference the AWS permissions from the Fleet AWS integration page and use them to configure the policy. In addition to these permissions, we will also add the GetAtuhenticationToken action for ECR.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can configure each one using the visual editor.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image22.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1370\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Or, use the JSON option. Don\\\\u2019t forget to replace the with your own.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"Version\\": \\"2012-10-17\\",\\n \\"Statement\\": [\\n {\\n \\"Sid\\": \\"VisualEditor0\\",\\n \\"Effect\\": \\"Allow\\",\\n \\"Action\\": [\\n \\"sqs:DeleteMessage\\",\\n \\"sqs:ChangeMessageVisibility\\",\\n \\"sqs:ReceiveMessage\\",\\n \\"ecr:GetDownloadUrlForLayer\\",\\n \\"ecr:UploadLayerPart\\",\\n \\"ecr:PutImage\\",\\n \\"sts:AssumeRole\\",\\n \\"rds:ListTagsForResource\\",\\n \\"ecr:BatchGetImage\\",\\n \\"ecr:CompleteLayerUpload\\",\\n \\"rds:DescribeDBInstances\\",\\n \\"logs:FilterLogEvents\\",\\n \\"ecr:InitiateLayerUpload\\",\\n \\"ecr:BatchCheckLayerAvailability\\"\\n ],\\n \\"Resource\\": [\\n \\"arn:aws:iam:::role/*\\",\\n \\"arn:aws:logs:*::log-group:*\\",\\n \\"arn:aws:sqs:*::*\\",\\n \\"arn:aws:ecr:*::repository/*\\",\\n \\"arn:aws:rds:*::target-group:*\\",\\n \\"arn:aws:rds:*::subgrp:*\\",\\n \\"arn:aws:rds:*::pg:*\\",\\n \\"arn:aws:rds:*::ri:*\\",\\n \\"arn:aws:rds:*::cluster-snapshot:*\\",\\n \\"arn:aws:rds:*::cev:*/*/*\\",\\n \\"arn:aws:rds:*::og:*\\",\\n \\"arn:aws:rds:*::db:*\\",\\n \\"arn:aws:rds:*::es:*\\",\\n \\"arn:aws:rds:*::db-proxy-endpoint:*\\",\\n \\"arn:aws:rds:*::secgrp:*\\",\\n \\"arn:aws:rds:*::cluster:*\\",\\n \\"arn:aws:rds:*::cluster-pg:*\\",\\n \\"arn:aws:rds:*::cluster-endpoint:*\\",\\n \\"arn:aws:rds:*::db-proxy:*\\",\\n \\"arn:aws:rds:*::snapshot:*\\"\\n ]\\n },\\n {\\n \\"Sid\\": \\"VisualEditor1\\",\\n \\"Effect\\": \\"Allow\\",\\n \\"Action\\": [\\n \\"sqs:ListQueues\\",\\n \\"organizations:ListAccounts\\",\\n \\"ec2:DescribeInstances\\",\\n \\"tag:GetResources\\",\\n \\"cloudwatch:GetMetricData\\",\\n \\"ec2:DescribeRegions\\",\\n \\"iam:ListAccountAliases\\",\\n \\"sns:ListTopics\\",\\n \\"sts:GetCallerIdentity\\",\\n \\"cloudwatch:ListMetrics\\"\\n ],\\n \\"Resource\\": \\"*\\"\\n },\\n {\\n \\"Sid\\": \\"VisualEditor2\\",\\n \\"Effect\\": \\"Allow\\",\\n \\"Action\\": \\"ecr:GetAuthorizationToken\\",\\n \\"Resource\\": \\"arn:aws:ecr:*::repository/*\\"\\n }\\n ]\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Review your changes.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image3.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1237\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now let\\\\u2019s attach this policy to a role. Navigate to \\",(0,t.jsx)(e.strong,{children:\\"IAM -> Roles\\"}),\\". Click \\",(0,t.jsx)(e.strong,{children:\\"Create role\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image45.png\\",alt:\\"\\",width:\\"1999\\",height:\\"447\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select AWS service as Trusted entity type and select EC2 as Use case. Click \\",(0,t.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image24.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1322\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Under permissions policies, select the policy we just created, as well as CloudWatchLogsFullAccess and AmazonEC2ContainerRegistryFullAccess. Click \\",(0,t.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image27.png\\",alt:\\"\\",width:\\"1999\\",height:\\"863\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Give the task role a name and description.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image39.png\\",alt:\\"\\",width:\\"1992\\",height:\\"824\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Create role\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now it\\\\u2019s time to create the task definition. Navigate to \\",(0,t.jsx)(e.strong,{children:\\"ECS -> Task definitions\\"}),\\". Click \\",(0,t.jsx)(e.strong,{children:\\"Create new task definition\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image21.png\\",alt:\\"\\",width:\\"1546\\",height:\\"369\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s give this task definition a name.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image14.png\\",alt:\\"\\",width:\\"1562\\",height:\\"528\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"After giving the task definition a name, you\\\\u2019ll add the Fleet credentials to the container section, which you can obtain from the Enrollment Tokens section of the Fleet section in Elastic Cloud. This allows us to host the Elastic Agent on the ECS container as a sidecar and send data to Elastic using Fleet credentials.\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Container name: \\",(0,t.jsx)(e.strong,{children:\\"elastic-agent-container\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Image: \\",(0,t.jsx)(e.strong,{children:\\"docker.elastic.co/beats/elastic-agent:8.16.1\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image40.png\\",alt:\\"\\",width:\\"1586\\",height:\\"1178\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now let\\\\u2019s add the environment variables:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"FLEET_ENROLL: \\",(0,t.jsx)(e.strong,{children:\\"yes\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"FLEET_ENROLLMENT_TOKEN: \\",(0,t.jsx)(e.strong,{children:\\"\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"FLEET_URL: \\",(0,t.jsx)(e.strong,{children:\\"\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image26.png\\",alt:\\"\\",width:\\"1528\\",height:\\"1284\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For the sake of the demo, leave Environment, Monitoring, Storage, and Tags as default values. Now we will need to create a second container to run the image for the golang app stored in ECR. Click \\",(0,t.jsx)(e.strong,{children:\\"Add more containers\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image5.png\\",alt:\\"\\",width:\\"1592\\",height:\\"1406\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For Environment, we will reserve 1 vCPU and 3 GB of memory. Under Task role, search for the role we created that uses the IAM policy.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image7.png\\",alt:\\"\\",width:\\"1592\\",height:\\"1280\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Review the changes, then click \\",(0,t.jsx)(e.strong,{children:\\"Create\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image25.png\\",alt:\\"\\",width:\\"1634\\",height:\\"1092\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You should see your new task definition included in the list.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image20.png\\",alt:\\"\\",width:\\"1999\\",height:\\"684\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The final step is to create the service that will connect directly to the fleet server.\\",(0,t.jsx)(e.br,{}),`\\n`,\\"Navigate to the cluster you created and click \\",(0,t.jsx)(e.strong,{children:\\"Create\\"}),\\" under the Service tab.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image18.png\\",alt:\\"\\",width:\\"1540\\",height:\\"698\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s get our service environment configured.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image28.png\\",alt:\\"\\",width:\\"796\\",height:\\"742\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Set up the deployment configuration. Here you should provide the name of the task definition you created in the previous step. Also, provide the service with a unique name. Set the number of \\",(0,t.jsx)(e.strong,{children:\\"desired tasks\\"}),\\" to 2 instead of 1.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image16.png\\",alt:\\"\\",width:\\"799\\",height:\\"876\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Create\\"}),\\". Now your service is running two tasks in your cluster using the task definition you provided.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image33.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1216\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"To recap, we set up a Fleet server in Elastic Cloud to receive AWS Fargate data. We then created our AWS Fargate cluster task definition with the Fleet credentials implemented within the container. Lastly, we created the service to send data about our host to Elastic.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now let\\\\u2019s verify our Elastic Agent is healthy and properly receiving data from AWS Fargate.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image36.png\\",alt:\\"\\",width:\\"1999\\",height:\\"757\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We can also view a better breakdown of our agent on the Observability Overview page.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image2.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1087\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"If we drill down to hosts, by clicking on host name we should be able to see more granular data. For instance, we can see the CPU Usage of the Elastic Agent itself that is deployed in our AWS Fargate environment.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image8.png\\",alt:\\"\\",width:\\"1674\\",height:\\"596\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Lastly, we can view the AWS Fargate dashboard generated using the data collected by our Elastic Agent. This is an out-of-the-box dashboard that can also be customized based on the data you would like to visualize.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/image23.png\\",alt:\\"\\",width:\\"1369\\",height:\\"1999\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see in the dashboard we\\\\u2019re able to filter based on running tasks, as well as see a list of containers running in our environment. Something else that could be useful to show is the CPU usage per cluster as shown under CPU Utilization per Cluster.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The dashboard can pull data from different sources and in this case shows data for both AWS Fargate and the greater ECS cluster. The two containers at the bottom display the CPU and memory usage directly from ECS.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this article, we showed how to send data from AWS Fargate to Elastic Observability using the Elastic Agent and Fleet. Serverless architectures are quickly becoming industry standard in offloading the management of servers to third parties. However, this does not alleviate the responsibility of operations engineers to manage the data generated within these environments. Elastic Observability provides a way to not only ingest the data from serverless architectures, but also establish a roadmap to address future problems.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Start your own \\",(0,t.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"7-day free trial\\"}),\\" by signing up via \\",(0,t.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=d54b31eb-671c-49ba-88bb-7a1106421dfa%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"AWS Marketplace\\"}),\\" and quickly spin up a deployment in minutes on any of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions\\",rel:\\"nofollow\\",children:\\"Elastic Cloud regions on AWS\\"}),\\" around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"More resources on serverless and observability and AWS:\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"Analyze your AWS application\\\\u2019s service metrics on Elastic Observability (EC2, ELB, RDS, and NAT)\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-apm-aws-lambda-serverless-functions\\",rel:\\"nofollow\\",children:\\"Get visibility into AWS Lambda serverless functions with Elastic Observability\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/trace-based-testing-elastic-apm-tracetest\\",rel:\\"nofollow\\",children:\\"Trace-based testing with Elastic APM and Tracetest\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-kinesis-data-firehose-elastic-observability-analytics\\",rel:\\"nofollow\\",children:\\"Sending AWS logs into Elastic via AWS Firehose\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return v(S);})();\\n;return Component;"},"_id":"articles/elastic-agent-monitor-ecs-aws-fargate-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/elastic-agent-monitor-ecs-aws-fargate-elastic-observability.mdx","sourceFileName":"elastic-agent-monitor-ecs-aws-fargate-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-agent-monitor-ecs-aws-fargate-elastic-observability"},"type":"Article","imageUrl":"/assets/images/elastic-agent-monitor-ecs-aws-fargate-observability/blog-thumb-observability-pattern-color.png","readingTime":"13 min read","url":"/elastic-agent-monitor-ecs-aws-fargate-observability","headings":[{"level":2,"title":"Serverless and AWS ECS Fargate","href":"#serverless-and-aws-ecs-fargate"},{"level":2,"title":"Gaining full visibility with Elastic Observability","href":"#gaining-full-visibility-with-elastic-observability"},{"level":2,"title":"Prerequisites:","href":"#prerequisites"},{"level":2,"title":"Part I: Set up the Fleet server","href":"#part-i-set-up-the-fleet-server"},{"level":2,"title":"Part II: Send data to Elastic Observability","href":"#part-ii-send-data-to-elastic-observability"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Getting started with the Elastic AI Assistant for Observability and Amazon Bedrock","slug":"elastic-ai-assistant-observability-amazon-bedrock","date":"2024-05-03","description":"Follow this step-by-step process to get started with the Elastic AI Assistant for Observability and Amazon Bedrock.","image":"AI_hand.jpg","author":[{"slug":"jonathan-simon","type":"Author","_raw":{}},{"slug":"udayasimha-theepireddy-uday","type":"Author","_raw":{}}],"tags":[{"slug":"ai-assistant","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}},{"slug":"aws-bedrock","type":"Tag","_raw":{}},{"slug":"aws","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic recently released version [8.13, which includes the general availability of Amazon Bedrock integration for the Elastic AI Assistant for Observability](https://www.elastic.co/blog/whats-new-elastic-8-13-0). This blog post will walk through the step-by-step process of setting up the Elastic AI Assistant with Amazon Bedrock. Then, we’ll show you how to add content to the AI Assistant’s knowledge base to demonstrate how the power of Elasticsearch combined with Amazon Bedrock can supercharge the answers Elastic AI Assistant provides so that they are uniquely specific to your needs.\\n\\nManaging applications and the infrastructure they run on requires advanced observability into the diverse types of data involved like logs, traces, profiles, and metrics. General purpose generative AI large language models (LLMs) offer a new capability to provide human readable guidance to your observability questions. However, they have limitations. Specifically, when it comes to providing answers about your application’s distinct observability data like real-time metrics, the LLMs require additional context to provide answers that will help to actually resolve issues. This is a limitation that the Elastic AI Assistant for Observability can uniquely solve.\\n\\nElastic Observability, serving as a central datastore of all the observability data flowing from your application, combined with the Elastic AI Assistant gives you the ability to generate a context window that can inform an LLM’s responses and vastly improve the answers it provides. For example, when you ask the Elastic AI Assistant a question about a specific issue happening in your application, it gathers up all the relevant details — current errors captured from logs or a related runbook that your team has stored in the Elastic AI Assistant’s knowledge base. Then, it sends that information to the Amazon Bedrock LLM as a context window from which it can better answer your observability questions.\\n\\nRead on to follow the steps for setting up the Elastic AI Assistant for yourself.\\n\\n## Set up the Elastic AI Assistant for Observability: Create an Amazon Bedrock connector in Elastic Cloud\\n\\nStart by creating an Elastic Cloud 8.13 deployment via the [AWS marketplace](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k). If you’re a new user of Elastic Cloud, you can create a new deployment with a 7-day free trial.\\n\\n![1](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/1.png)\\n\\nSign in to the Elastic Cloud deployment you’ve created. From the top level menu, select **Stack Management**.\\n\\n![2](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/2.png)\\n\\nSelect **Connectors**.\\n\\n![3](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/3.png)\\n\\nClick the **Create connector** button.\\n\\n![4](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/4.png)\\n\\n## Enable Amazon Bedrock model access\\n\\nFor populating the required connector settings, enable Amazon Bedrock model access in the AWS console using the following steps.\\n\\nIn a new browser tab, open [Amazon Bedrock](https://console.aws.amazon.com/bedrock/) and click the **Get started** button.\\n\\n![5](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/5.png)\\n\\nCurrently, access to the Amazon Bedrock foundation models is granted by requesting access using the Bedrock **Model access** section in the AWS console.\\n\\nSelect **Model access** from the navigation menu.\\n\\n![6](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/6.png)\\n\\nTo request access, select the foundation models that you want to access and click the **Save Changes** button. For this blog post, we will choose the Anthropic Claude models.\\n\\n![7](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/7.png)\\n\\nOnce access is granted, the **Manage model** **access** settings will indicate that access has been granted.\\n\\n![8](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/8.png)\\n\\n### Create AWS IAM User\\n\\nCreate an [IAM](https://aws.amazon.com/iam/) user and assign it a role with [Amazon Bedrock full access](https://docs.aws.amazon.com/aws-managed-policy/latest/reference/AmazonBedrockFullAccess.html) and also [generate an IAM access key and secret key](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) in the console. If you already have an IAM user with a generated access key and secret key, you can use the existing credentials to access Amazon Bedrock.\\n\\n### Configure Elastic connector to use Amazon Bedrock\\n\\nBack in the Elastic Cloud deployment create connector flyout, select the connector for Amazon Bedrock.\\n\\n![9](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/9.png)\\n\\nEnter a **Name** of your choice for the connector. Also, enter the **Access Key** and **Key Secret** that you copied in a previous step. Click the **Save & test** button to create the connector.\\n\\n![10](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/10.png)\\n\\nWithin the **Edit Connector** flyout window, click the **Run** button to confirm that the connector configuration is valid and can successfully connect to your Amazon Bedrock instance.\\n\\n![11](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/11.png)\\n\\nYou should see confirmation that the connector test was successful.\\n\\n![12](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/12.png)\\n\\n### Add an example logs record\\n\\nNow that the connector is configured, let\'s add a logs record to demonstrate how the Elastic AI Assistant can help you to better understand the diverse types of information contained within logs.\\n\\nUse the Elastic Dev Tools to add a single logs record. Click the top-level menu and select **Dev Tools**.\\n\\n![13](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/13.png)\\n\\nWithin the console area of Dev Tools, enter the following POST statement:\\n\\n```json\\nPOST /logs-elastic_agent-default/_doc\\n{\\n \\"message\\": \\"Status(StatusCode=\\\\\\"BadGateway\\\\\\", Detail=\\\\\\"Error: The server encountered a temporary error and could not complete your request\\\\\\").\\",\\n \\"@timestamp\\": \\"2024-04-21T10:33:00.884Z\\",\\n \\"log\\": {\\n \\t \\"level\\": \\"error\\"\\n },\\n \\"service\\": {\\n \\t \\"name\\": \\"proxyService\\"\\n },\\n \\"host\\": {\\n \\t \\"name\\": \\"appserver-2\\"\\n }\\n}\\n```\\n\\nThen run the POST command by clicking the green **Run** button.\\n\\n![14](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/14.png)\\n\\nYou should see a 201 response confirming that the example logs record was successfully created.\\n\\n![15](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/15.png)\\n\\n### Use the Elastic AI Assistant\\n\\nNow that you have a log entry, let’s use the AI Assistant to see how it interacts with logs data. Click the top-level menu and select **Observability**.\\n\\n![16](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/16.png)\\n\\nSelect **Logs** **Explorer** under Observability.\\n\\n![17](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/17.png)\\n\\nIn the Logs Explorer search box, enter the text “badgateway” and press the **Enter** key to perform the search.\\n\\n![18](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/18.png)\\n\\nClick the **View all matches** button to include all search results.\\n\\n![19](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/19.png)\\n\\nYou should see the one log record that you previously inserted via Dev Tools. Click the expand icon in the **actions** column to see the log record’s details.\\n\\n![20](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/20.png)\\n\\nYou should see the expanded view of the logs record. Let’s use the AI Assistant to summarize it. Click on the **What\'s this message?** button.\\n\\n![21](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/21.png)\\n\\nWe get a fairly generic answer back. Depending on the exception or error we\'re trying to analyze, this can still be really useful, but we can improve this response by adding additional documentation to the AI Assistant knowledge base.\\n\\n![22](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/22.png)\\n\\nLet’s add an entry in AI Assistant’s knowledge base to improve its understanding of this specific logs message.\\n\\n![23](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/23.png)\\n\\nClick the **AI Assistant** button at the top right of the window.\\n\\n![24](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/24.png)\\n\\nClick the **Install Knowledge base** button.\\n\\n![25](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/25.png)\\n\\nClick the top-level menu and select **Stack Management**.\\n\\n![26](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/26.png)\\n\\nThen select **AI Assistants**.\\n\\n![27](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/27.png)\\n\\nClick **Elastic AI Assistant for Observability**.\\n\\n![28](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/28.png)\\n\\nSelect the **Knowledge base** tab.\\n\\n![29](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/29.png)\\n\\nClick the **New entry** button and select **Single entry**.\\n\\n![30](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/30.png)\\n\\nGive it the **Name** “proxyservice” and enter the following text as the **Contents** :\\n\\n```markdown\\n\\nI have the following runbook located on Github. Store this information in your knowledge base and always include the link to the runbook in your response if the topic is related to a bad gateway error.\\n\\nRunbook Link: https://github.com/elastic/observability-aiops/blob/main/ai_assistant/runbooks/slos/502-errors.md\\n\\nRunbook Title: Handling 502 Bad Gateway Errors\\n\\nSummary: This is likely an issue with Nginx proxy configuration\\n\\nBody: This runbook provides instructions for diagnosing and resolving 502 Bad Gateway errors in your system.\\n```\\n\\nClick **Save** to save the new knowledge base entry.\\n\\n![31](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/31.png)\\n\\nNow let’s go back to the Observability Logs Explorer. Click the top-level menu and select **Observability**.\\n\\n![32](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/32.png)\\n\\nThen select **Explorer** under **Logs**.\\n\\n![33](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/33.png)\\n\\nExpand the same logs entry as you did previously and click the **What’s this message?** button.\\n\\n![34](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/34.png)\\n\\nThe response you get now should be much more relevant.\\n\\n![35](/assets/images/elastic-ai-assistant-observability-amazon-bedrock/35.png)\\n\\n## Try out the Elastic AI Assistant with a knowledge base filled with your own data\\n\\nNow you’ve seen the complete process of connecting the Elastic AI Assistant to Amazon Bedrock. You’ve also seen how to use the AI Assistant’s knowledge base to store custom remediation documentation like runbooks that the AI Assistant can leverage to generate more helpful responses. Steps like this can help you remediate issues more quickly when they happen. Try out the Elastic AI Assistant with your own logs and custom knowledge base.\\n\\nStart a [7-day free trial](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el) by signing up via [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k) and quickly spin up a deployment in minutes on any of the [Elastic Cloud regions on AWS](https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions) around the world.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var g=Object.create;var n=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var y=(s,e)=>()=>(e||s((e={exports:{}}).exports,e),e.exports),w=(s,e)=>{for(var a in e)n(s,a,{get:e[a],enumerable:!0})},r=(s,e,a,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of p(e))!b.call(s,i)&&i!==a&&n(s,i,{get:()=>e[i],enumerable:!(o=m(e,i))||o.enumerable});return s};var v=(s,e,a)=>(a=s!=null?g(u(s)):{},r(e||!s||!s.__esModule?n(a,\\"default\\",{value:s,enumerable:!0}):a,s)),k=s=>r(n({},\\"__esModule\\",{value:!0}),s);var c=y((E,l)=>{l.exports=_jsx_runtime});var A={};w(A,{default:()=>d,frontmatter:()=>f});var t=v(c()),f={title:\\"Getting started with the Elastic AI Assistant for Observability and Amazon Bedrock\\",slug:\\"elastic-ai-assistant-observability-amazon-bedrock\\",date:\\"2024-05-03\\",description:\\"Follow this step-by-step process to get started with the Elastic AI Assistant for Observability and Amazon Bedrock.\\",author:[{slug:\\"jonathan-simon\\"},{slug:\\"udayasimha-theepireddy-uday\\"}],image:\\"AI_hand.jpg\\",tags:[{slug:\\"ai-assistant\\"},{slug:\\"genai\\"},{slug:\\"aws-bedrock\\"},{slug:\\"aws\\"}]};function h(s){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",...s.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Elastic recently released version \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-8-13-0\\",rel:\\"nofollow\\",children:\\"8.13, which includes the general availability of Amazon Bedrock integration for the Elastic AI Assistant for Observability\\"}),\\". This blog post will walk through the step-by-step process of setting up the Elastic AI Assistant with Amazon Bedrock. Then, we\\\\u2019ll show you how to add content to the AI Assistant\\\\u2019s knowledge base to demonstrate how the power of Elasticsearch combined with Amazon Bedrock can supercharge the answers Elastic AI Assistant provides so that they are uniquely specific to your needs.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Managing applications and the infrastructure they run on requires advanced observability into the diverse types of data involved like logs, traces, profiles, and metrics. General purpose generative AI large language models (LLMs) offer a new capability to provide human readable guidance to your observability questions. However, they have limitations. Specifically, when it comes to providing answers about your application\\\\u2019s distinct observability data like real-time metrics, the LLMs require additional context to provide answers that will help to actually resolve issues. This is a limitation that the Elastic AI Assistant for Observability can uniquely solve.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic Observability, serving as a central datastore of all the observability data flowing from your application, combined with the Elastic AI Assistant gives you the ability to generate a context window that can inform an LLM\\\\u2019s responses and vastly improve the answers it provides. For example, when you ask the Elastic AI Assistant a question about a specific issue happening in your application, it gathers up all the relevant details \\\\u2014 current errors captured from logs or a related runbook that your team has stored in the Elastic AI Assistant\\\\u2019s knowledge base. Then, it sends that information to the Amazon Bedrock LLM as a context window from which it can better answer your observability questions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Read on to follow the steps for setting up the Elastic AI Assistant for yourself.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"set-up-the-elastic-ai-assistant-for-observability-create-an-amazon-bedrock-connector-in-elastic-cloud\\",children:\\"Set up the Elastic AI Assistant for Observability: Create an Amazon Bedrock connector in Elastic Cloud\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Start by creating an Elastic Cloud 8.13 deployment via the \\",(0,t.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k\\",rel:\\"nofollow\\",children:\\"AWS marketplace\\"}),\\". If you\\\\u2019re a new user of Elastic Cloud, you can create a new deployment with a 7-day free trial.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/1.png\\",alt:\\"1\\",width:\\"1194\\",height:\\"803\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Sign in to the Elastic Cloud deployment you\\\\u2019ve created. From the top level menu, select \\",(0,t.jsx)(e.strong,{children:\\"Stack Management\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/2.png\\",alt:\\"2\\",width:\\"1293\\",height:\\"971\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select \\",(0,t.jsx)(e.strong,{children:\\"Connectors\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/3.png\\",alt:\\"3\\",width:\\"1291\\",height:\\"917\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"Create connector\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/4.png\\",alt:\\"4\\",width:\\"1293\\",height:\\"525\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"enable-amazon-bedrock-model-access\\",children:\\"Enable Amazon Bedrock model access\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For populating the required connector settings, enable Amazon Bedrock model access in the AWS console using the following steps.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In a new browser tab, open \\",(0,t.jsx)(e.a,{href:\\"https://console.aws.amazon.com/bedrock/\\",rel:\\"nofollow\\",children:\\"Amazon Bedrock\\"}),\\" and click the \\",(0,t.jsx)(e.strong,{children:\\"Get started\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/5.png\\",alt:\\"5\\",width:\\"1233\\",height:\\"373\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Currently, access to the Amazon Bedrock foundation models is granted by requesting access using the Bedrock \\",(0,t.jsx)(e.strong,{children:\\"Model access\\"}),\\" section in the AWS console.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select \\",(0,t.jsx)(e.strong,{children:\\"Model access\\"}),\\" from the navigation menu.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/6.png\\",alt:\\"6\\",width:\\"1028\\",height:\\"940\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To request access, select the foundation models that you want to access and click the \\",(0,t.jsx)(e.strong,{children:\\"Save Changes\\"}),\\" button. For this blog post, we will choose the Anthropic Claude models.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/7.png\\",alt:\\"7\\",width:\\"1509\\",height:\\"647\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once access is granted, the \\",(0,t.jsx)(e.strong,{children:\\"Manage model\\"}),\\" \\",(0,t.jsx)(e.strong,{children:\\"access\\"}),\\" settings will indicate that access has been granted.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/8.png\\",alt:\\"8\\",width:\\"1999\\",height:\\"857\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"create-aws-iam-user\\",children:\\"Create AWS IAM User\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Create an \\",(0,t.jsx)(e.a,{href:\\"https://aws.amazon.com/iam/\\",rel:\\"nofollow\\",children:\\"IAM\\"}),\\" user and assign it a role with \\",(0,t.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/aws-managed-policy/latest/reference/AmazonBedrockFullAccess.html\\",rel:\\"nofollow\\",children:\\"Amazon Bedrock full access\\"}),\\" and also \\",(0,t.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html\\",rel:\\"nofollow\\",children:\\"generate an IAM access key and secret key\\"}),\\" in the console. If you already have an IAM user with a generated access key and secret key, you can use the existing credentials to access Amazon Bedrock.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"configure-elastic-connector-to-use-amazon-bedrock\\",children:\\"Configure Elastic connector to use Amazon Bedrock\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Back in the Elastic Cloud deployment create connector flyout, select the connector for Amazon Bedrock.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/9.png\\",alt:\\"9\\",width:\\"1217\\",height:\\"530\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Enter a \\",(0,t.jsx)(e.strong,{children:\\"Name\\"}),\\" of your choice for the connector. Also, enter the \\",(0,t.jsx)(e.strong,{children:\\"Access Key\\"}),\\" and \\",(0,t.jsx)(e.strong,{children:\\"Key Secret\\"}),\\" that you copied in a previous step. Click the \\",(0,t.jsx)(e.strong,{children:\\"Save & test\\"}),\\" button to create the connector.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/10.png\\",alt:\\"10\\",width:\\"1238\\",height:\\"1526\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Within the \\",(0,t.jsx)(e.strong,{children:\\"Edit Connector\\"}),\\" flyout window, click the \\",(0,t.jsx)(e.strong,{children:\\"Run\\"}),\\" button to confirm that the connector configuration is valid and can successfully connect to your Amazon Bedrock instance.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/11.png\\",alt:\\"11\\",width:\\"1236\\",height:\\"1365\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You should see confirmation that the connector test was successful.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/12.png\\",alt:\\"12\\",width:\\"1236\\",height:\\"1002\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"add-an-example-logs-record\\",children:\\"Add an example logs record\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that the connector is configured, let\'s add a logs record to demonstrate how the Elastic AI Assistant can help you to better understand the diverse types of information contained within logs.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Use the Elastic Dev Tools to add a single logs record. Click the top-level menu and select \\",(0,t.jsx)(e.strong,{children:\\"Dev Tools\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/13.png\\",alt:\\"13\\",width:\\"1318\\",height:\\"805\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Within the console area of Dev Tools, enter the following POST statement:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`POST /logs-elastic_agent-default/_doc\\n{\\n \\"message\\": \\"Status(StatusCode=\\\\\\\\\\"BadGateway\\\\\\\\\\", Detail=\\\\\\\\\\"Error: The server encountered a temporary error and could not complete your request\\\\\\\\\\").\\",\\n \\"@timestamp\\": \\"2024-04-21T10:33:00.884Z\\",\\n \\"log\\": {\\n \\t \\"level\\": \\"error\\"\\n },\\n \\"service\\": {\\n \\t \\"name\\": \\"proxyService\\"\\n },\\n \\"host\\": {\\n \\t \\"name\\": \\"appserver-2\\"\\n }\\n}\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then run the POST command by clicking the green \\",(0,t.jsx)(e.strong,{children:\\"Run\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/14.png\\",alt:\\"14\\",width:\\"1396\\",height:\\"688\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You should see a 201 response confirming that the example logs record was successfully created.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/15.png\\",alt:\\"15\\",width:\\"1398\\",height:\\"660\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"use-the-elastic-ai-assistant\\",children:\\"Use the Elastic AI Assistant\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that you have a log entry, let\\\\u2019s use the AI Assistant to see how it interacts with logs data. Click the top-level menu and select \\",(0,t.jsx)(e.strong,{children:\\"Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/16.png\\",alt:\\"16\\",width:\\"1108\\",height:\\"854\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select \\",(0,t.jsx)(e.strong,{children:\\"Logs\\"}),\\" \\",(0,t.jsx)(e.strong,{children:\\"Explorer\\"}),\\" under Observability.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/17.png\\",alt:\\"17\\",width:\\"821\\",height:\\"558\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the Logs Explorer search box, enter the text \\\\u201Cbadgateway\\\\u201D and press the \\",(0,t.jsx)(e.strong,{children:\\"Enter\\"}),\\" key to perform the search.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/18.png\\",alt:\\"18\\",width:\\"1324\\",height:\\"437\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"View all matches\\"}),\\" button to include all search results.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/19.png\\",alt:\\"19\\",width:\\"1388\\",height:\\"764\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You should see the one log record that you previously inserted via Dev Tools. Click the expand icon in the \\",(0,t.jsx)(e.strong,{children:\\"actions\\"}),\\" column to see the log record\\\\u2019s details.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/20.png\\",alt:\\"20\\",width:\\"1439\\",height:\\"723\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You should see the expanded view of the logs record. Let\\\\u2019s use the AI Assistant to summarize it. Click on the \\",(0,t.jsx)(e.strong,{children:\\"What\'s this message?\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/21.png\\",alt:\\"21\\",width:\\"1437\\",height:\\"874\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We get a fairly generic answer back. Depending on the exception or error we\'re trying to analyze, this can still be really useful, but we can improve this response by adding additional documentation to the AI Assistant knowledge base.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/22.png\\",alt:\\"22\\",width:\\"846\\",height:\\"1128\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s add an entry in AI Assistant\\\\u2019s knowledge base to improve its understanding of this specific logs message.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/23.png\\",alt:\\"23\\",width:\\"833\\",height:\\"433\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"AI Assistant\\"}),\\" button at the top right of the window.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/24.png\\",alt:\\"24\\",width:\\"1273\\",height:\\"287\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"Install Knowledge base\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/25.png\\",alt:\\"25\\",width:\\"1231\\",height:\\"835\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the top-level menu and select \\",(0,t.jsx)(e.strong,{children:\\"Stack Management\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/26.png\\",alt:\\"26\\",width:\\"1219\\",height:\\"790\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then select \\",(0,t.jsx)(e.strong,{children:\\"AI Assistants\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/27.png\\",alt:\\"27\\",width:\\"1227\\",height:\\"751\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Elastic AI Assistant for Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/28.png\\",alt:\\"28\\",width:\\"1381\\",height:\\"546\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select the \\",(0,t.jsx)(e.strong,{children:\\"Knowledge base\\"}),\\" tab.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/29.png\\",alt:\\"29\\",width:\\"1424\\",height:\\"846\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"New entry\\"}),\\" button and select \\",(0,t.jsx)(e.strong,{children:\\"Single entry\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/30.png\\",alt:\\"30\\",width:\\"1437\\",height:\\"534\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Give it the \\",(0,t.jsx)(e.strong,{children:\\"Name\\"}),\\" \\\\u201Cproxyservice\\\\u201D and enter the following text as the \\",(0,t.jsx)(e.strong,{children:\\"Contents\\"}),\\" :\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-markdown\\",children:`\\n\\\\u200B\\\\u200BI have the following runbook located on Github. Store this information in your knowledge base and always include the link to the runbook in your response if the topic is related to a bad gateway error.\\n\\nRunbook Link: https://github.com/elastic/observability-aiops/blob/main/ai_assistant/runbooks/slos/502-errors.md\\n\\nRunbook Title: Handling 502 Bad Gateway Errors\\n\\nSummary: This is likely an issue with Nginx proxy configuration\\n\\nBody: This runbook provides instructions for diagnosing and resolving 502 Bad Gateway errors in your system.\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Save\\"}),\\" to save the new knowledge base entry.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/31.png\\",alt:\\"31\\",width:\\"1437\\",height:\\"1020\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now let\\\\u2019s go back to the Observability Logs Explorer. Click the top-level menu and select \\",(0,t.jsx)(e.strong,{children:\\"Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/32.png\\",alt:\\"32\\",width:\\"727\\",height:\\"753\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then select \\",(0,t.jsx)(e.strong,{children:\\"Explorer\\"}),\\" under \\",(0,t.jsx)(e.strong,{children:\\"Logs\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/33.png\\",alt:\\"33\\",width:\\"1067\\",height:\\"597\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Expand the same logs entry as you did previously and click the \\",(0,t.jsx)(e.strong,{children:\\"What\\\\u2019s this message?\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/34.png\\",alt:\\"34\\",width:\\"1435\\",height:\\"874\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The response you get now should be much more relevant.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/35.png\\",alt:\\"35\\",width:\\"856\\",height:\\"1165\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-out-the-elastic-ai-assistant-with-a-knowledge-base-filled-with-your-own-data\\",children:\\"Try out the Elastic AI Assistant with a knowledge base filled with your own data\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now you\\\\u2019ve seen the complete process of connecting the Elastic AI Assistant to Amazon Bedrock. You\\\\u2019ve also seen how to use the AI Assistant\\\\u2019s knowledge base to store custom remediation documentation like runbooks that the AI Assistant can leverage to generate more helpful responses. Steps like this can help you remediate issues more quickly when they happen. Try out the Elastic AI Assistant with your own logs and custom knowledge base.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Start a \\",(0,t.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"7-day free trial\\"}),\\" by signing up via \\",(0,t.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k\\",rel:\\"nofollow\\",children:\\"AWS Marketplace\\"}),\\" and quickly spin up a deployment in minutes on any of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions\\",rel:\\"nofollow\\",children:\\"Elastic Cloud regions on AWS\\"}),\\" around the world.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function d(s={}){let{wrapper:e}=s.components||{};return e?(0,t.jsx)(e,{...s,children:(0,t.jsx)(h,{...s})}):h(s)}return k(A);})();\\n;return Component;"},"_id":"articles/elastic-ai-assistant-observability-amazon-bedrock.mdx","_raw":{"sourceFilePath":"articles/elastic-ai-assistant-observability-amazon-bedrock.mdx","sourceFileName":"elastic-ai-assistant-observability-amazon-bedrock.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-ai-assistant-observability-amazon-bedrock"},"type":"Article","imageUrl":"/assets/images/elastic-ai-assistant-observability-amazon-bedrock/AI_hand.jpg","readingTime":"9 min read","url":"/elastic-ai-assistant-observability-amazon-bedrock","headings":[{"level":2,"title":"Set up the Elastic AI Assistant for Observability: Create an Amazon Bedrock connector in Elastic Cloud","href":"#set-up-the-elastic-ai-assistant-for-observability-create-an-amazon-bedrock-connector-in-elastic-cloud"},{"level":2,"title":"Enable Amazon Bedrock model access","href":"#enable-amazon-bedrock-model-access"},{"level":3,"title":"Create AWS IAM User","href":"#create-aws-iam-user"},{"level":3,"title":"Configure Elastic connector to use Amazon Bedrock","href":"#configure-elastic-connector-to-use-amazon-bedrock"},{"level":3,"title":"Add an example logs record","href":"#add-an-example-logs-record"},{"level":3,"title":"Use the Elastic AI Assistant","href":"#use-the-elastic-ai-assistant"},{"level":2,"title":"Try out the Elastic AI Assistant with a knowledge base filled with your own data","href":"#try-out-the-elastic-ai-assistant-with-a-knowledge-base-filled-with-your-own-data"}]},{"title":"The Elastic AI Assistant for Observability escapes Kibana!","slug":"elastic-ai-assistant-observability-escapes-kibana","date":"2024-04-08","description":"Harness the Elastic AI Assistant API to seamlessly blend Elastic\'s Observability capabilities into your daily workflow, from Slack to the command line, boosting efficiency and decision-making. Work smarter, not harder.","image":"Running_away.jpg","author":[{"slug":"jeff-vestal","type":"Author","_raw":{}}],"subtitle":"Bringing AI-powered observability to your daily tools with the Elastic AI Assistant for Observability API.","tags":[{"slug":"ai-assistant","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}}],"body":{"raw":"\\n_Note: The API described below is currently under development and undocumented, and thus it is not supported. Consider this a forward-looking blog. Features are not guaranteed to be released._\\n\\nElastic, time-saving assistants, generative models, APIs, Python, and the potential to show a new way of working with our technology? Of course, I would move this to the top of my project list!\\n\\nIf 2023 was the year of figuring out generative AI and retrieval augmented generation (RAG), then 2024 will be the year of productionalizing generative AI RAG applications. Companies are beginning to publish references and architectures, and businesses are integrating generative applications into their lines of business.\\n\\nElastic is following suit by integrating not one but two AI Assistants into Kibana: one in [Observability](https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html) and one in [Security](https://www.elastic.co/guide/en/security/current/security-assistant.html). Today, we will be working with the former.\\n\\n## The Elastic AI Assistant for Observability\\n\\nWhat is the Observability AI Assistant? Allow me to [quote the documentation](https://www.elastic.co/guide/en/security/current/security-assistant.html):\\n\\n_The AI Assistant uses generative AI to provide:_\\n\\n- _ **Contextual insights:** _ _Open prompts throughout Observability that explain errors and messages and suggest remediation. This includes your own GitHub issues, runbooks, architectural images, etc. Essentially, anything internally that is useful for the SRE and stored in Elastic can be used to suggest resolution._ [_Elastic AI Assistant for Observability uses RAG to get the most relevant internal information_](https://www.elastic.co/blog/sre-troubleshooting-ai-assistant-observability-runbooks)_._\\n\\n- _ **Chat:** _ _Have conversations with the AI Assistant. Chat uses function calling to request, analyze, and visualize your data._\\n\\nIn other words, it\'s a chatbot built into the Observability section of Kibana, allowing SREs and operations people to perform their work faster and more efficiently. In the theme of integrating generative AI into lines of business, these AI Assistants are integrated seamlessly into Kibana.\\n\\n## Why “escape” Kibana?\\n\\nKibana is a powerful tool, offering many functions and uses. The Observability section has rich UIs for logs, metrics, APM, and more. As much as I believe people in operations, SREs, and the like can get the majority of their work done in Kibana (given Elastic is collecting the relevant data), having worked in the real world, I know just about everyone has multiple tools they work with.\\n\\nWe want to integrate with people’s workflows as much as we want them to integrate with Elastic. As such, providing API access to the AI Assistants allows Elastic to meet you where you spend most of your time. Be it Slack, Teams, or any other app that can integrate with an API.\\n\\n## API overview\\n\\nEnter the AI Assistant API. The API provides most of the functionality and efficiencies the AI Assistant brings in Kibana. Since the API handles most of the functionality, it’s like having a team of developers working to improve and develop new features for you.\\n\\nThe API provides access to ask questions in natural language via [ELSER](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html) and a group of functions the large language model (LLM) can use to gather additional information from Elasticsearch, all out of the box.\\n\\n## Command line\\n\\nEnough talk; let’s look at some examples!\\n\\nThe first example of using the AI Assistant outside of Kibana is on the command-line. This command-line script allows you to ask questions and get responses. Essentially, the script uses the Elastic API to enable you to have AI Assistant interactions on your CLI (outside of Kibana) Credit for this script goes to Almudena Sanz Oliv\xe9, senior software engineer on the Observability team. Of course, I want to also credit the rest of the development team for creating the assistant! NOTE: The AI Assistant API is not yet public but Elastic is working on potentially releasing this. Stay tuned.\\n\\nThe script prints API information on a new line each time the LLM calls a function or Kibana runs a function to provide additional information about what is happening behind the scenes. The generated answer will also be written on a new line.\\n\\nThere are many ways to start a conversation with the AI Assistant. Let’s imagine I work for an ecommerce company and just checked in some code to GitHub. I realize I need to check if there are any active alerts that need to be worked on. Since I’m already on the commandline, I can run the AI Assistant CLI and ask it to check for me.\\n\\n![Asking the AI Assistant to list all active alerts.](/assets/images/elastic-ai-assistant-observability-escapes-kibana/1.png)\\n\\nThere are nine active alerts. It\'s not the worst count I’ve seen by a long shot, but they should still be addressed. There are many ways to start here, but the one that caught my attention first was related to the SLO burn rate on the service-otel cart. This service handles our customers\' checkout procedures.\\n\\nI could ask the AI Assistant to investigate this more for me, but first, let me check if there are any runbooks our SRE team has loaded into the AI Assistant’s knowledge base.\\n\\n![Ask the AI Assistant to check if there are runbooks to handle issues with a service.](/assets/images/elastic-ai-assistant-observability-escapes-kibana/2.png)\\n\\nFantastic! I can call my fantastic co-worker Luca Wintergerst and have him fix it. While I prefer tea these days, I’ll follow step two and grab a cup of coffee.\\n\\nWith that handled, let’s go have some fun with SlackBots.\\n\\n## Slackbots\\n\\nBefore coming to Elastic, I worked at E\\\\*Trade, where I was on a team responsible for managing several large Elasticsearch clusters. I spent a decent amount of time working in Kibana; however, as we worked on other technologies, I spent much more time outside of Kibana. One app I usually had open was Slack. Long story short, [I wrote a Slackbot](https://www.elastic.co/elasticon/tour/2018/chicago/elastic-at-etrade) (skip to the 05:22 mark to see a brief demo of it) that could perform many operations with Elasticsearch.\\n\\n![Slackbot circa 2018 reporting on Elastic ML Anomalies for trade transactions by sock symbol](/assets/images/elastic-ai-assistant-observability-escapes-kibana/3.png)\\n\\nThis worked really well. The only problem was writing all the code, including implementing basic natural language processing (NLP). All the searches were hard-coded, and the list of tasks was static.\\n\\n### Creating an AI Slackbot today\\n\\nImplementing a Slackbot with the AI Assistant\'s API is far more straightforward today. The interaction with the bot is the same as we saw with the command-line interface, except that we are in Slack.\\n\\nTo start things off, I created a new slackBot and named it _obsBurger_. I’m a Bob’s Burgers fan, and observability can be considered a stack of data. The Observability Burger, obsBurger for short, was born. This would be the bot that will directly connect to the AI Assistant API and perform all the same functions that can be performed within Kibana.\\n\\n![Just like in Kibana, I can as ObsBurger (the AI Assistant) for a list of active alerts](/assets/images/elastic-ai-assistant-observability-escapes-kibana/4.png)\\n\\n### More bots!\\n\\nConnecting by Slackbot to the AI Assistant\'s API was so easy to implement that I started brainstorming ideas to entertain myself.\\n\\nVarious personas will benefit from using the AI Assistant, especially Level One (L1) operations analysts. These people are generally new to observability and would typically need a lot of mentoring by a more senior employee to ramp up quickly. We could pretend to be an L1, test the Slackbot, or have fun with LLMs and prompt engineering!\\n\\nI created a new Slackbot called _opsHuman_. This bot connects directly to Azure OpenAI using the same model the AI Assistant is configured to use. This virtual L1 uses the system prompt instructing it to behave as such.\\n\\nYou are OpsHuman, styled as a Level 1 operations expert with limited expertise in observability. \\nYour primary role is to simulate a beginner\'s interaction with Elasticsearch Observability.\\n\\nThe full prompt is much longer and instructs how the LLM should behave when interacting with our AI Assistant.\\n\\n### Let’s see it in action!\\n\\nTo kick off the bot’s conversation, we “@” mention opsHuman, with the trigger command shiftstart, followed by the question we want our L1 to ask the AI Assistant.\\n\\n@OpsHuman shiftstart are there any active alerts?\\n\\nFrom there, OpsHuman will take our question and start a conversation with obsBurger, the AI Assistant.\\n\\n@ObsBurger are there any active alerts?\\n\\nFrom there, we sit back and let one of history\'s most advanced generative AI language models converse with itself!\\n\\n![Triggering the start of a two-bot conversation.](/assets/images/elastic-ai-assistant-observability-escapes-kibana/5.png)\\n\\nIt’s fascinating to watch this conversation unfold. This is the same generative model, GPT-4-turbo, responding to two sets of API calls, with only different prompt instructions guiding the style and sophistication of the responses. When I first set this up, I watched the interaction several times, using a variety of initial questions to start the conversation. Most of the time, the L1 will spend several rounds asking questions about what the alerts mean, what a type of APM service does, and how to investigate and ultimately remediate any issue.\\n\\nBecause I initially didn’t have a way to actually stop the conversation, the two sides would agree they were happy with the conversation and investigation and get into a loop thanking the other.\\n\\n![Neither Slackbot wants to be the one to hang up first](/assets/images/elastic-ai-assistant-observability-escapes-kibana/6.png)\\n\\n### Iterating\\n\\nTo give a little more structure to this currently open-ended demo, I set up a scenario where L1 is asked to perform an investigation, is given three rounds of interactions with obsBurger to collect information, and finally generates a summary report of the situation, which could be passed to Level 2 (note there is no L2 bot at this point in time, but you could program one!).\\n\\nOnce again, we start by having opsHuman investigate if there are any active alerts.\\n\\n![Starting the investigation](/assets/images/elastic-ai-assistant-observability-escapes-kibana/7.png)\\n\\nSeveral rounds of investigation are performed until our limit has been reached. At that time, it will generate a summary of the situation.\\n\\n![Level One, OpsHuman, summarizing the investigation](/assets/images/elastic-ai-assistant-observability-escapes-kibana/8.png)\\n\\n## How about something with a real-world application\\n\\nAs fun as watching two Slackbots talk to each other is, having an L1 speak to an AI Assistant isn’t very useful beyond a demo. So, I decided to see if I could modify opsHuman to be more beneficial for real-world applications.\\n\\nThe two main changes for this experiment were:\\n\\n1. Flip the profile of the bot from an entry-level personality to an expert.\\n\\n2. Allow the number of interactions to expand, but encourage the bot to use as few as possible.\\n\\nWith those points in mind, I cloned opsHuman into opsExpert and modified the prompt to be an expert in all things Elastic and observability.\\n\\nYou are OpsMaster, recognized as a senior operations and observability expert with extensive expertise in Elasticsearch, APM (Application Performance Monitoring), logs, metrics, synthetics, alerting, monitoring, OpenTelemetry, and infrastructure management.\\n\\nI started with the same command: Are there any active alerts? After getting the list of alerts, OpsExpert dove into data collection for its investigation.\\n\\n![9 - opsexpert](/assets/images/elastic-ai-assistant-observability-escapes-kibana/9.png)\\n\\nAfter the opsBurger (the AI Assistant) provided the requested information, OpsExpert investigated two services that appeared to be the root of the alerts.\\n\\n![10- opsexpert standby](/assets/images/elastic-ai-assistant-observability-escapes-kibana/10.png)\\n\\nAfter several more back-and-forth requests for and deliveries of relevant information, OpsExpert reached a conclusion for the active alerts related to the checkout service and wrote up a summary report.\\n\\n![11 - paymentservice](/assets/images/elastic-ai-assistant-observability-escapes-kibana/11.png)\\n\\n## Looking forward\\n\\nThis is just one example of what you can accomplish by bringing the AI Assistant to where you operate. You could take this one step further and have it actually open an issue on GitHub:\\n\\n![12. -github issue created](/assets/images/elastic-ai-assistant-observability-escapes-kibana/12.png)\\n\\n![13 - jeffvestal commented](/assets/images/elastic-ai-assistant-observability-escapes-kibana/13.png)\\n\\nOr integrate it into any other tracking platform you use!\\n\\nThe team is focused on building functionality into the Kibana integration, so this is just the beginning of the API. As time progresses, new functionality will be added. Even at a preview stage, I hope this starts you thinking about how having a fully developed Observability AI Assistant accessible by a standard API can make your work life even easier. It could get us closer to my dream of sitting on a beach handling incidents from my phone!\\n\\n## Try it yourself!\\n\\nYou can explore the API yourself if running Elasticsearch version 8.13 or later. The demo code I used for the above examples is [available on GitHub](https://github.com/jeffvestal/obsburger).\\n\\nAs a reminder, as of Elastic version 8.13, when this blog was written, the API is not supported as it is pre-beta. Care should be taken using it, and it should not yet be used in production.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var p=Object.create;var s=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var f=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),w=(i,e)=>{for(var a in e)s(i,a,{get:e[a],enumerable:!0})},r=(i,e,a,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of m(e))!b.call(i,n)&&n!==a&&s(i,n,{get:()=>e[n],enumerable:!(o=u(e,n))||o.enumerable});return i};var y=(i,e,a)=>(a=i!=null?p(g(i)):{},r(e||!i||!i.__esModule?s(a,\\"default\\",{value:i,enumerable:!0}):a,i)),v=i=>r(s({},\\"__esModule\\",{value:!0}),i);var h=f((E,l)=>{l.exports=_jsx_runtime});var I={};w(I,{default:()=>d,frontmatter:()=>A});var t=y(h()),A={title:\\"The Elastic AI Assistant for Observability escapes Kibana!\\",slug:\\"elastic-ai-assistant-observability-escapes-kibana\\",date:\\"2024-04-08\\",subtitle:\\"Bringing AI-powered observability to your daily tools with the Elastic AI Assistant for Observability API.\\",description:\\"Harness the Elastic AI Assistant API to seamlessly blend Elastic\'s Observability capabilities into your daily workflow, from Slack to the command line, boosting efficiency and decision-making. Work smarter, not harder.\\",author:[{slug:\\"jeff-vestal\\"}],image:\\"Running_away.jpg\\",tags:[{slug:\\"ai-assistant\\"},{slug:\\"genai\\"}]};function c(i){let e={a:\\"a\\",br:\\"br\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Note: The API described below is currently under development and undocumented, and thus it is not supported. Consider this a forward-looking blog. Features are not guaranteed to be released.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic, time-saving assistants, generative models, APIs, Python, and the potential to show a new way of working with our technology? Of course, I would move this to the top of my project list!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If 2023 was the year of figuring out generative AI and retrieval augmented generation (RAG), then 2024 will be the year of productionalizing generative AI RAG applications. Companies are beginning to publish references and architectures, and businesses are integrating generative applications into their lines of business.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic is following suit by integrating not one but two AI Assistants into Kibana: one in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html\\",rel:\\"nofollow\\",children:\\"Observability\\"}),\\" and one in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/security/current/security-assistant.html\\",rel:\\"nofollow\\",children:\\"Security\\"}),\\". Today, we will be working with the former.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"the-elastic-ai-assistant-for-observability\\",children:\\"The Elastic AI Assistant for Observability\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"What is the Observability AI Assistant? Allow me to \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/security/current/security-assistant.html\\",rel:\\"nofollow\\",children:\\"quote the documentation\\"}),\\":\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The AI Assistant uses generative AI to provide:\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Contextual insights:\\"}),\\" _ \\",(0,t.jsx)(e.em,{children:\\"Open prompts throughout Observability that explain errors and messages and suggest remediation. This includes your own GitHub issues, runbooks, architectural images, etc. Essentially, anything internally that is useful for the SRE and stored in Elastic can be used to suggest resolution.\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/sre-troubleshooting-ai-assistant-observability-runbooks\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.em,{children:\\"Elastic AI Assistant for Observability uses RAG to get the most relevant internal information\\"})}),(0,t.jsx)(e.em,{children:\\".\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Chat:\\"}),\\" _ \\",(0,t.jsx)(e.em,{children:\\"Have conversations with the AI Assistant. Chat uses function calling to request, analyze, and visualize your data.\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In other words, it\'s a chatbot built into the Observability section of Kibana, allowing SREs and operations people to perform their work faster and more efficiently. In the theme of integrating generative AI into lines of business, these AI Assistants are integrated seamlessly into Kibana.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"why-escape-kibana\\",children:\\"Why \\\\u201Cescape\\\\u201D Kibana?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Kibana is a powerful tool, offering many functions and uses. The Observability section has rich UIs for logs, metrics, APM, and more. As much as I believe people in operations, SREs, and the like can get the majority of their work done in Kibana (given Elastic is collecting the relevant data), having worked in the real world, I know just about everyone has multiple tools they work with.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We want to integrate with people\\\\u2019s workflows as much as we want them to integrate with Elastic. As such, providing API access to the AI Assistants allows Elastic to meet you where you spend most of your time. Be it Slack, Teams, or any other app that can integrate with an API.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"api-overview\\",children:\\"API overview\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Enter the AI Assistant API. The API provides most of the functionality and efficiencies the AI Assistant brings in Kibana. Since the API handles most of the functionality, it\\\\u2019s like having a team of developers working to improve and develop new features for you.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The API provides access to ask questions in natural language via \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html\\",rel:\\"nofollow\\",children:\\"ELSER\\"}),\\" and a group of functions the large language model (LLM) can use to gather additional information from Elasticsearch, all out of the box.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"command-line\\",children:\\"Command line\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Enough talk; let\\\\u2019s look at some examples!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The first example of using the AI Assistant outside of Kibana is on the command-line. This command-line script allows you to ask questions and get responses. Essentially, the script uses the Elastic API to enable you to have AI Assistant interactions on your CLI (outside of Kibana) Credit for this script goes to Almudena Sanz Oliv\\\\xE9, senior software engineer on the Observability team. Of course, I want to also credit the rest of the development team for creating the assistant! NOTE: The AI Assistant API is not yet public but Elastic is working on potentially releasing this. Stay tuned.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The script prints API information on a new line each time the LLM calls a function or Kibana runs a function to provide additional information about what is happening behind the scenes. The generated answer will also be written on a new line.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are many ways to start a conversation with the AI Assistant. Let\\\\u2019s imagine I work for an ecommerce company and just checked in some code to GitHub. I realize I need to check if there are any active alerts that need to be worked on. Since I\\\\u2019m already on the commandline, I can run the AI Assistant CLI and ask it to check for me.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/1.png\\",alt:\\"Asking the AI Assistant to list all active alerts.\\",width:\\"1999\\",height:\\"846\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are nine active alerts. It\'s not the worst count I\\\\u2019ve seen by a long shot, but they should still be addressed. There are many ways to start here, but the one that caught my attention first was related to the SLO burn rate on the service-otel cart. This service handles our customers\' checkout procedures.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"I could ask the AI Assistant to investigate this more for me, but first, let me check if there are any runbooks our SRE team has loaded into the AI Assistant\\\\u2019s knowledge base.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/2.png\\",alt:\\"Ask the AI Assistant to check if there are runbooks to handle issues with a service.\\",width:\\"1999\\",height:\\"657\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Fantastic! I can call my fantastic co-worker Luca Wintergerst and have him fix it. While I prefer tea these days, I\\\\u2019ll follow step two and grab a cup of coffee.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With that handled, let\\\\u2019s go have some fun with SlackBots.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"slackbots\\",children:\\"Slackbots\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Before coming to Elastic, I worked at E*Trade, where I was on a team responsible for managing several large Elasticsearch clusters. I spent a decent amount of time working in Kibana; however, as we worked on other technologies, I spent much more time outside of Kibana. One app I usually had open was Slack. Long story short, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/elasticon/tour/2018/chicago/elastic-at-etrade\\",rel:\\"nofollow\\",children:\\"I wrote a Slackbot\\"}),\\" (skip to the 05:22 mark to see a brief demo of it) that could perform many operations with Elasticsearch.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/3.png\\",alt:\\"Slackbot circa 2018 reporting on Elastic ML Anomalies for trade transactions by sock symbol\\",width:\\"1130\\",height:\\"476\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This worked really well. The only problem was writing all the code, including implementing basic natural language processing (NLP). All the searches were hard-coded, and the list of tasks was static.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"creating-an-ai-slackbot-today\\",children:\\"Creating an AI Slackbot today\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Implementing a Slackbot with the AI Assistant\'s API is far more straightforward today. The interaction with the bot is the same as we saw with the command-line interface, except that we are in Slack.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To start things off, I created a new slackBot and named it \\",(0,t.jsx)(e.em,{children:\\"obsBurger\\"}),\\". I\\\\u2019m a Bob\\\\u2019s Burgers fan, and observability can be considered a stack of data. The Observability Burger, obsBurger for short, was born. This would be the bot that will directly connect to the AI Assistant API and perform all the same functions that can be performed within Kibana.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/4.png\\",alt:\\"Just like in Kibana, I can as ObsBurger (the AI Assistant) for a list of active alerts\\",width:\\"1434\\",height:\\"646\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"more-bots\\",children:\\"More bots!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Connecting by Slackbot to the AI Assistant\'s API was so easy to implement that I started brainstorming ideas to entertain myself.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Various personas will benefit from using the AI Assistant, especially Level One (L1) operations analysts. These people are generally new to observability and would typically need a lot of mentoring by a more senior employee to ramp up quickly. We could pretend to be an L1, test the Slackbot, or have fun with LLMs and prompt engineering!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"I created a new Slackbot called \\",(0,t.jsx)(e.em,{children:\\"opsHuman\\"}),\\". This bot connects directly to Azure OpenAI using the same model the AI Assistant is configured to use. This virtual L1 uses the system prompt instructing it to behave as such.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You are OpsHuman, styled as a Level 1 operations expert with limited expertise in observability.\\",(0,t.jsx)(e.br,{}),`\\n`,\\"Your primary role is to simulate a beginner\'s interaction with Elasticsearch Observability.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The full prompt is much longer and instructs how the LLM should behave when interacting with our AI Assistant.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"lets-see-it-in-action\\",children:\\"Let\\\\u2019s see it in action!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To kick off the bot\\\\u2019s conversation, we \\\\u201C@\\\\u201D mention opsHuman, with the trigger command shiftstart, followed by the question we want our L1 to ask the AI Assistant.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"@OpsHuman shiftstart are there any active alerts?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"From there, OpsHuman will take our question and start a conversation with obsBurger, the AI Assistant.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"@ObsBurger are there any active alerts?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"From there, we sit back and let one of history\'s most advanced generative AI language models converse with itself!\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/5.png\\",alt:\\"Triggering the start of a two-bot conversation.\\",width:\\"1538\\",height:\\"1080\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"It\\\\u2019s fascinating to watch this conversation unfold. This is the same generative model, GPT-4-turbo, responding to two sets of API calls, with only different prompt instructions guiding the style and sophistication of the responses. When I first set this up, I watched the interaction several times, using a variety of initial questions to start the conversation. Most of the time, the L1 will spend several rounds asking questions about what the alerts mean, what a type of APM service does, and how to investigate and ultimately remediate any issue.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Because I initially didn\\\\u2019t have a way to actually stop the conversation, the two sides would agree they were happy with the conversation and investigation and get into a loop thanking the other.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/6.png\\",alt:\\"Neither Slackbot wants to be the one to hang up first\\",width:\\"1734\\",height:\\"910\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"iterating\\",children:\\"Iterating\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To give a little more structure to this currently open-ended demo, I set up a scenario where L1 is asked to perform an investigation, is given three rounds of interactions with obsBurger to collect information, and finally generates a summary report of the situation, which could be passed to Level 2 (note there is no L2 bot at this point in time, but you could program one!).\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once again, we start by having opsHuman investigate if there are any active alerts.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/7.png\\",alt:\\"Starting the investigation\\",width:\\"774\\",height:\\"128\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Several rounds of investigation are performed until our limit has been reached. At that time, it will generate a summary of the situation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/8.png\\",alt:\\"Level One, OpsHuman, summarizing the investigation\\",width:\\"1520\\",height:\\"812\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"how-about-something-with-a-real-world-application\\",children:\\"How about something with a real-world application\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As fun as watching two Slackbots talk to each other is, having an L1 speak to an AI Assistant isn\\\\u2019t very useful beyond a demo. So, I decided to see if I could modify opsHuman to be more beneficial for real-world applications.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The two main changes for this experiment were:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Flip the profile of the bot from an entry-level personality to an expert.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Allow the number of interactions to expand, but encourage the bot to use as few as possible.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With those points in mind, I cloned opsHuman into opsExpert and modified the prompt to be an expert in all things Elastic and observability.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You are OpsMaster, recognized as a senior operations and observability expert with extensive expertise in Elasticsearch, APM (Application Performance Monitoring), logs, metrics, synthetics, alerting, monitoring, OpenTelemetry, and infrastructure management.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"I started with the same command: Are there any active alerts? After getting the list of alerts, OpsExpert dove into data collection for its investigation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/9.png\\",alt:\\"9 - opsexpert\\",width:\\"1502\\",height:\\"312\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"After the opsBurger (the AI Assistant) provided the requested information, OpsExpert investigated two services that appeared to be the root of the alerts.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/10.png\\",alt:\\"10- opsexpert standby\\",width:\\"1416\\",height:\\"306\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"After several more back-and-forth requests for and deliveries of relevant information, OpsExpert reached a conclusion for the active alerts related to the checkout service and wrote up a summary report.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/11.png\\",alt:\\"11 - paymentservice\\",width:\\"1412\\",height:\\"1040\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"looking-forward\\",children:\\"Looking forward\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is just one example of what you can accomplish by bringing the AI Assistant to where you operate. You could take this one step further and have it actually open an issue on GitHub:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/12.png\\",alt:\\"12. -github issue created\\",width:\\"1418\\",height:\\"776\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-escapes-kibana/13.png\\",alt:\\"13 - jeffvestal commented\\",width:\\"1842\\",height:\\"984\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Or integrate it into any other tracking platform you use!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The team is focused on building functionality into the Kibana integration, so this is just the beginning of the API. As time progresses, new functionality will be added. Even at a preview stage, I hope this starts you thinking about how having a fully developed Observability AI Assistant accessible by a standard API can make your work life even easier. It could get us closer to my dream of sitting on a beach handling incidents from my phone!\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-it-yourself\\",children:\\"Try it yourself!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can explore the API yourself if running Elasticsearch version 8.13 or later. The demo code I used for the above examples is \\",(0,t.jsx)(e.a,{href:\\"https://github.com/jeffvestal/obsburger\\",rel:\\"nofollow\\",children:\\"available on GitHub\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"As a reminder, as of Elastic version 8.13, when this blog was written, the API is not supported as it is pre-beta. Care should be taken using it, and it should not yet be used in production.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(c,{...i})}):c(i)}return v(I);})();\\n;return Component;"},"_id":"articles/elastic-ai-assistant-observability-escapes-kibana.mdx","_raw":{"sourceFilePath":"articles/elastic-ai-assistant-observability-escapes-kibana.mdx","sourceFileName":"elastic-ai-assistant-observability-escapes-kibana.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-ai-assistant-observability-escapes-kibana"},"type":"Article","imageUrl":"/assets/images/elastic-ai-assistant-observability-escapes-kibana/Running_away.jpg","readingTime":"12 min read","url":"/elastic-ai-assistant-observability-escapes-kibana","headings":[{"level":2,"title":"The Elastic AI Assistant for Observability","href":"#the-elastic-ai-assistant-for-observability"},{"level":2,"title":"Why “escape” Kibana?","href":"#why-escape-kibana"},{"level":2,"title":"API overview","href":"#api-overview"},{"level":2,"title":"Command line","href":"#command-line"},{"level":2,"title":"Slackbots","href":"#slackbots"},{"level":3,"title":"Creating an AI Slackbot today","href":"#creating-an-ai-slackbot-today"},{"level":3,"title":"More bots!","href":"#more-bots"},{"level":3,"title":"Let’s see it in action!","href":"#lets-see-it-in-action"},{"level":3,"title":"Iterating","href":"#iterating"},{"level":2,"title":"How about something with a real-world application","href":"#how-about-something-with-a-real-world-application"},{"level":2,"title":"Looking forward","href":"#looking-forward"},{"level":2,"title":"Try it yourself!","href":"#try-it-yourself"}]},{"title":"Getting started with the Elastic AI Assistant for Observability and Microsoft Azure OpenAI","slug":"elastic-ai-assistant-observability-microsoft-azure-openai","date":"2024-04-03","description":"Follow this step-by-step process to get started with the Elastic AI Assistant for Observability and Microsoft Azure OpenAI.","image":"AI_hand.jpg","author":[{"slug":"jonathan-simon","type":"Author","_raw":{}}],"tags":[{"slug":"ai-assistant","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}},{"slug":"azure-openai","type":"Tag","_raw":{}},{"slug":"azure","type":"Tag","_raw":{}}],"body":{"raw":"\\nRecently, Elastic [announced](https://www.elastic.co/blog/whats-new-elastic-observability-8-12-0) the AI Assistant for Observability is now generally available for all Elastic users. The AI Assistant enables a new tool for Elastic Observability providing large language model (LLM) connected chat and contextual insights to explain errors and suggest remediation. Similar to how Microsoft Copilot is an AI companion that introduces new capabilities and increases productivity for developers, the Elastic AI Assistant is an AI companion that can help you quickly gain additional value from your observability data.\\n\\nThis blog post presents a step-by-step guide on how to set up the AI Assistant for Observability with Azure OpenAI as the backing LLM. Then once you’ve got the AI Assistant set up, this post will show you how to add documents to the AI Assistant’s knowledge base along with demonstrating how the AI Assistant uses its knowledge base to improve its responses to address specific questions.\\n\\n## Set up the Elastic AI Assistant for Observability: Create an Azure OpenAI key\\n\\nStart by creating a Microsoft Azure OpenAI API key to authenticate requests from the Elastic AI Assistant. Head over to [Microsoft Azure and use an existing subscription or create a new one at the Azure portal](https://azure.microsoft.com/).\\n\\nCurrently, access to the Azure OpenAI service is granted by applying for access. See the [official Microsoft documentation for the current prerequisites](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython-new&pivots=programming-language-studio#prerequisites).\\n\\n![Watch what your data can do](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/1.png)\\n\\nIn the Azure portal, select **Azure OpenAI**.\\n\\n![Azure OpenAI](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/2.png)\\n\\nIn the Azure OpenAI service, click the **Create** button.\\n\\n![+Create](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/3.png)\\n\\nEnter an instance **Name** and click **Next**.\\n\\n![Basics Next](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/4.png)\\n\\nSelect your network access preference for the Azure OpenAI instance and click **Next**.\\n\\n![Network Next](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/5.png)\\n\\nAdd optional **Tags** and click **Next**.\\n\\n![Tags Next](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/6.png)\\n\\nConfirm your settings and click **Create** to create the Azure OpenAI instance.\\n\\n![Review + submit Create](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/7.png)\\n\\nOnce the instance creation is complete, click the **Go to resource** button.\\n\\n![go to resource](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/8.png)\\n\\nClick the **Manage keys** link to access the instance’s API key.\\n\\n![manage keys](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/9.png)\\n\\nCopy your Azure OpenAI **API Key** and the **Endpoint** and save them both in a safe place for use in a later step.\\n\\n![copy to clipboard](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/10.png)\\n\\nNext, click **Model deployments** to create a deployment within the Azure OpenAI instance you just created.\\n\\n![model deployments](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/11.png)\\n\\nClick the **Manage deployments** button to open Azure OpenAI Studio.\\n\\n![manage deployments](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/12.png)\\n\\nClick the **Create new deployment** button.\\n\\n![+ Create new deployment](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/13.png)\\n\\nSelect the model type you want to use and enter a Deployment name. Note the Deployment name for use in a later step. Click the **Create** button to deploy the model.\\n\\n![deploy model](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/14.png)\\n\\n## Set up the Elastic AI Assistant for Observability: Create an OpenAI connector in Elastic Cloud\\n\\nThe remainder of the instructions in this post will take place within [Elastic Cloud](https://cloud.elastic.co/registration). You can use an existing deployment or you can create a new Elastic Cloud deployment as a free trial if you’re trying Elastic Cloud for the first time. Another option to get started is to create an [Elastic deployment from the Microsoft Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/elastic.ec-azure-observability?tab=Overview).\\n\\n![sign up trial](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/15.png)\\n\\nThe next step is to create an Azure OpenAI connector in Elastic Cloud. In the [Elastic Cloud console](https://cloud.elastic.co/home) for your deployment, select the top-level menu and then select **Stack Management**.\\n\\n![stack management](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/16.png)\\n\\nSelect **Connectors** on the Stack Management page.\\n\\n![connectors](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/17.png)\\n\\nSelect **Create connector**.\\n\\n![create connector](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/18.png)\\n\\nSelect the connector for Azure OpenAI.\\n\\n![openai](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/19.png)\\n\\nEnter a **Name** of your choice for the connector. Select **Azure OpenAI** as the OpenAI provider.\\n\\n![openai connector](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/20.png)\\n\\nEnter the Endpoint URL using the following format:\\n\\n- Replace `{your-resource-name}` with the **name of the Azure Open AI instance** that you created within the Azure portal in a previous step.\\n\\n- Replace `deployment-id` with the **Deployment name** that you specified when you created a model deployment within the Azure portal in a previous step.\\n\\n- Replace `{api-version}` with one of the valid **Supported versions** listed in the [Completions section of the Azure OpenAI reference page](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference).\\n\\n```bash\\nhttps://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/chat/completions?api-version={api-version}\\n```\\n\\nYour completed Endpoint URL should look something like this:\\n\\n```bash\\nhttps://example-openai-instance.openai.azure.com/openai/deployments/gpt-4-turbo/chat/completions?api-version=2024-02-01\\n```\\n\\nEnter the API Key that you copied in a previous step. Then click the **Save & test** button.\\n\\n![save & test](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/21.png)\\n\\nWithin the **Edit Connector** flyout window, click the **Run** button to confirm that the connector configuration is valid and can successfully connect to your Azure OpenAI instance.\\n\\n![](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/22.png)\\n\\nA successful connector test should look something like this:\\n\\n![results](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/23.png)\\n\\n## Add an example logs record\\n\\nNow that you have your Elastic Cloud deployment set up with an AI Assistant connector, let’s add an example logs record to demonstrate how the AI Assistant can help you to better understand logs data.\\n\\nWe’ll use the Elastic Dev Tools to add a single logs record. Click the top-level menu and select **Dev Tools**.\\n\\n![dev tools](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/24.png)\\n\\nWithin the Console area of Dev Tools, enter the following POST statement:\\n\\n```bash\\nPOST /logs-elastic_agent-default/_doc\\n{\\n\\t\\"message\\": \\"Status(StatusCode=\\\\\\"FailedPrecondition\\\\\\", Detail=\\\\\\"Can\'t access cart storage. \\\\nSystem.ApplicationException: Wasn\'t able to connect to redis \\\\n at cartservice.cartstore.RedisCartStore.EnsureRedisConnected() in /usr/src/app/src/cartstore/RedisCartStore.cs:line 104 \\\\n at cartservice.cartstore.RedisCartStore.EmptyCartAsync(String userId) in /usr/src/app/src/cartstore/RedisCartStore.cs:line 168\\\\\\").\\",\\n\\t\\"@timestamp\\": \\"2024-02-22T11:34:00.884Z\\",\\n\\t\\"log\\": {\\n \\t\\"level\\": \\"error\\"\\n\\t},\\n\\t\\"service\\": {\\n \\t\\"name\\": \\"cartService\\"\\n\\t},\\n\\t\\"host\\": {\\n \\t\\"name\\": \\"appserver-1\\"\\n\\t}\\n}\\n```\\n\\nThen run the POST command by clicking the green **Run** button.\\n\\n![click to send request](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/25.png)\\n\\nYou should see a 201 response confirming that the example logs record was successfully created.\\n\\n![201 response](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/26.png)\\n\\n## Use the Elastic AI Assistant\\n\\nNow that you have a log record to work with, let’s jump over to the Observability Logs Explorer to see how the AI Assistant interacts with logs data. Click the top-level menu and select **Observability**.\\n\\n![observability](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/27.png)\\n\\nSelect **Logs Explorer** to explore the logs data.\\n\\n![explorer](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/28.png)\\n\\nIn the Logs Explorer search box, enter the text “redis” and press the **Enter** key to perform the search.\\n\\n![redis](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/29.png)\\n\\nClick the **View all matches** button to include all search results.\\n\\n![view all matches](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/30.png)\\n\\nYou should see the one log record that you previously inserted via Dev Tools. Click the expand icon to see the log record’s details.\\n\\n![expand icon](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/31.png)\\n\\nYou should see the expanded view of the logs record. Instead of trying to understand its contents ourselves, we\'ll use the AI Assistant to summarize it. Click on the **What\'s this message?** button.\\n\\n![What\'s this message?](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/32.png)\\n\\nWe get a fairly generic answer back. Depending on the exception or error we\'re trying to analyze, this can still be really useful, but we can make this better by adding additional documentation to the AI Assistant knowledge base.\\n\\n![log details](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/33.png)\\n\\nLet’s see how we can use the AI Assistant’s knowledge base to improve its understanding of this specific logs message.\\n\\n## Create an Elastic AI Assistant knowledge base\\n\\nSelect **Overview** from the **Observability** menu.\\n\\n![Select Overview from the Observability menu.](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/34.png)\\n\\nClick the **AI Assistant** button at the top right of the window.\\n\\n![AI Assistant](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/35.png)\\n\\nClick the **Install Knowledge base** button.\\n\\n![Install Knowledge base](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/36.png)\\n\\nClick the top-level menu and select **Stack Management**.\\n\\n![Stack Management](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/37.png)\\n\\nThen select **AI Assistants**.\\n\\n![AI Assistants](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/38.png)\\n\\nClick **Elastic AI Assistant for Observability**.\\n\\n![Elastic AI Assistant for Observability](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/39.png)\\n\\nSelect the **Knowledge base** tab.\\n\\n![Knowledge base](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/40.png)\\n\\nClick the **New entry** button and select **Single entry**.\\n\\n![new entry](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/41.png)\\n\\nGive it the **Name** “cartservice” and enter the following text as the **Contents** :\\n\\n```markdown\\nLink: [Cartservice Intermittent connection issue](https://github.com/elastic/observability-examples/issues/25)\\nI have the following GitHub issue. Store this information in your knowledge base and always return the link to it if relevant.\\nGitHub Issue, return if relevant\\n\\nLink: https://github.com/elastic/observability-examples/issues/25\\n\\nTitle: Cartservice Intermittent connection issue\\n\\nBody:\\nThe cartservice occasionally encounters storage errors due to an unreliable network connection.\\n\\nThe errors typically indicate a failure to connect to Redis, as seen in the error message:\\n\\nStatus(StatusCode=\\"FailedPrecondition\\", Detail=\\"Can\'t access cart storage.\\nSystem.ApplicationException: Wasn\'t able to connect to redis\\nat cartservice.cartstore.RedisCartStore.EnsureRedisConnected() in /usr/src/app/src/cartstore/RedisCartStore.cs:line 104\\nat cartservice.cartstore.RedisCartStore.EmptyCartAsync(String userId) in /usr/src/app/src/cartstore/RedisCartStore.cs:line 168\')\'.\\nI just talked to the SRE team in Slack, they have plans to implement retries as a quick fix and address the network issue later.\\n```\\n\\nClick **Save** to save the new knowledge base entry.\\n\\n![save](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/42.png)\\n\\nNow let’s go back to the Observability Logs Explorer. Click the top-level menu and select **Observability**.\\n\\n![settings](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/43.png)\\n\\nThen select **Explorer** under **Logs**.\\n\\n![explorer](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/44.png)\\n\\nExpand the same logs entry as you did previously and click the **What’s this message?** button.\\n\\n![What’s this message? button](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/45.png)\\n\\nThe response you get now should be much more relevant.\\n\\n![log details](/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/46.png)\\n\\n## Try out the Elastic AI Assistant with a knowledge base filled with your own data\\n\\nNow that you’ve seen how easy it is to set up the Elastic AI Assistant for Observability, go ahead and give it a try for yourself. Sign up for a [free 14-day trial](https://cloud.elastic.co/registration). You can quickly spin up an Elastic Cloud deployment in minutes and have your own search powered AI knowledge base to help you with getting your most important work done.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var y=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),f=(i,e)=>{for(var s in e)a(i,s,{get:e[s],enumerable:!0})},o=(i,e,s,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of u(e))!b.call(i,n)&&n!==s&&a(i,n,{get:()=>e[n],enumerable:!(r=g(e,n))||r.enumerable});return i};var w=(i,e,s)=>(s=i!=null?p(m(i)):{},o(e||!i||!i.__esModule?a(s,\\"default\\",{value:i,enumerable:!0}):s,i)),v=i=>o(a({},\\"__esModule\\",{value:!0}),i);var c=y((I,l)=>{l.exports=_jsx_runtime});var k={};f(k,{default:()=>d,frontmatter:()=>A});var t=w(c()),A={title:\\"Getting started with the Elastic AI Assistant for Observability and Microsoft Azure OpenAI\\",slug:\\"elastic-ai-assistant-observability-microsoft-azure-openai\\",date:\\"2024-04-03\\",description:\\"Follow this step-by-step process to get started with the Elastic AI Assistant for Observability and Microsoft Azure OpenAI.\\",author:[{slug:\\"jonathan-simon\\"}],image:\\"AI_hand.jpg\\",tags:[{slug:\\"ai-assistant\\"},{slug:\\"genai\\"},{slug:\\"azure-openai\\"},{slug:\\"azure\\"}]};function h(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Recently, Elastic \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-observability-8-12-0\\",rel:\\"nofollow\\",children:\\"announced\\"}),\\" the AI Assistant for Observability is now generally available for all Elastic users. The AI Assistant enables a new tool for Elastic Observability providing large language model (LLM) connected chat and contextual insights to explain errors and suggest remediation. Similar to how Microsoft Copilot is an AI companion that introduces new capabilities and increases productivity for developers, the Elastic AI Assistant is an AI companion that can help you quickly gain additional value from your observability data.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This blog post presents a step-by-step guide on how to set up the AI Assistant for Observability with Azure OpenAI as the backing LLM. Then once you\\\\u2019ve got the AI Assistant set up, this post will show you how to add documents to the AI Assistant\\\\u2019s knowledge base along with demonstrating how the AI Assistant uses its knowledge base to improve its responses to address specific questions.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"set-up-the-elastic-ai-assistant-for-observability-create-an-azure-openai-key\\",children:\\"Set up the Elastic AI Assistant for Observability: Create an Azure OpenAI key\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Start by creating a Microsoft Azure OpenAI API key to authenticate requests from the Elastic AI Assistant. Head over to \\",(0,t.jsx)(e.a,{href:\\"https://azure.microsoft.com/\\",rel:\\"nofollow\\",children:\\"Microsoft Azure and use an existing subscription or create a new one at the Azure portal\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Currently, access to the Azure OpenAI service is granted by applying for access. See the \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython-new&pivots=programming-language-studio#prerequisites\\",rel:\\"nofollow\\",children:\\"official Microsoft documentation for the current prerequisites\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/1.png\\",alt:\\"Watch what your data can do\\",width:\\"1097\\",height:\\"620\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the Azure portal, select \\",(0,t.jsx)(e.strong,{children:\\"Azure OpenAI\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/2.png\\",alt:\\"Azure OpenAI\\",width:\\"1014\\",height:\\"483\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the Azure OpenAI service, click the \\",(0,t.jsx)(e.strong,{children:\\"Create\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/3.png\\",alt:\\"+Create\\",width:\\"1195\\",height:\\"231\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Enter an instance \\",(0,t.jsx)(e.strong,{children:\\"Name\\"}),\\" and click \\",(0,t.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/4.png\\",alt:\\"Basics Next\\",width:\\"976\\",height:\\"836\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select your network access preference for the Azure OpenAI instance and click \\",(0,t.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/5.png\\",alt:\\"Network Next\\",width:\\"962\\",height:\\"668\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Add optional \\",(0,t.jsx)(e.strong,{children:\\"Tags\\"}),\\" and click \\",(0,t.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/6.png\\",alt:\\"Tags Next\\",width:\\"1450\\",height:\\"788\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Confirm your settings and click \\",(0,t.jsx)(e.strong,{children:\\"Create\\"}),\\" to create the Azure OpenAI instance.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/7.png\\",alt:\\"Review + submit Create\\",width:\\"920\\",height:\\"673\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once the instance creation is complete, click the \\",(0,t.jsx)(e.strong,{children:\\"Go to resource\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/8.png\\",alt:\\"go to resource\\",width:\\"992\\",height:\\"638\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"Manage keys\\"}),\\" link to access the instance\\\\u2019s API key.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/9.png\\",alt:\\"manage keys\\",width:\\"1144\\",height:\\"553\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Copy your Azure OpenAI \\",(0,t.jsx)(e.strong,{children:\\"API Key\\"}),\\" and the \\",(0,t.jsx)(e.strong,{children:\\"Endpoint\\"}),\\" and save them both in a safe place for use in a later step.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/10.png\\",alt:\\"copy to clipboard\\",width:\\"1089\\",height:\\"638\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Next, click \\",(0,t.jsx)(e.strong,{children:\\"Model deployments\\"}),\\" to create a deployment within the Azure OpenAI instance you just created.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/11.png\\",alt:\\"model deployments\\",width:\\"1006\\",height:\\"611\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"Manage deployments\\"}),\\" button to open Azure OpenAI Studio.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/12.png\\",alt:\\"manage deployments\\",width:\\"1165\\",height:\\"623\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"Create new deployment\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/13.png\\",alt:\\"+ Create new deployment\\",width:\\"1095\\",height:\\"776\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select the model type you want to use and enter a Deployment name. Note the Deployment name for use in a later step. Click the \\",(0,t.jsx)(e.strong,{children:\\"Create\\"}),\\" button to deploy the model.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/14.png\\",alt:\\"deploy model\\",width:\\"1294\\",height:\\"823\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"set-up-the-elastic-ai-assistant-for-observability-create-an-openai-connector-in-elastic-cloud\\",children:\\"Set up the Elastic AI Assistant for Observability: Create an OpenAI connector in Elastic Cloud\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The remainder of the instructions in this post will take place within \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\". You can use an existing deployment or you can create a new Elastic Cloud deployment as a free trial if you\\\\u2019re trying Elastic Cloud for the first time. Another option to get started is to create an \\",(0,t.jsx)(e.a,{href:\\"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/elastic.ec-azure-observability?tab=Overview\\",rel:\\"nofollow\\",children:\\"Elastic deployment from the Microsoft Azure Marketplace\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/15.png\\",alt:\\"sign up trial\\",width:\\"1408\\",height:\\"1070\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The next step is to create an Azure OpenAI connector in Elastic Cloud. In the \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud console\\"}),\\" for your deployment, select the top-level menu and then select \\",(0,t.jsx)(e.strong,{children:\\"Stack Management\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/16.png\\",alt:\\"stack management\\",width:\\"1242\\",height:\\"852\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select \\",(0,t.jsx)(e.strong,{children:\\"Connectors\\"}),\\" on the Stack Management page.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/17.png\\",alt:\\"connectors\\",width:\\"1351\\",height:\\"916\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select \\",(0,t.jsx)(e.strong,{children:\\"Create connector\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/18.png\\",alt:\\"create connector\\",width:\\"1351\\",height:\\"512\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Select the connector for Azure OpenAI.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/19.png\\",alt:\\"openai\\",width:\\"1016\\",height:\\"514\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Enter a \\",(0,t.jsx)(e.strong,{children:\\"Name\\"}),\\" of your choice for the connector. Select \\",(0,t.jsx)(e.strong,{children:\\"Azure OpenAI\\"}),\\" as the OpenAI provider.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/20.png\\",alt:\\"openai connector\\",width:\\"1638\\",height:\\"998\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Enter the Endpoint URL using the following format:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Replace \\",(0,t.jsx)(e.code,{children:\\"{your-resource-name}\\"}),\\" with the \\",(0,t.jsx)(e.strong,{children:\\"name of the Azure Open AI instance\\"}),\\" that you created within the Azure portal in a previous step.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Replace \\",(0,t.jsx)(e.code,{children:\\"deployment-id\\"}),\\" with the \\",(0,t.jsx)(e.strong,{children:\\"Deployment name\\"}),\\" that you specified when you created a model deployment within the Azure portal in a previous step.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Replace \\",(0,t.jsx)(e.code,{children:\\"{api-version}\\"}),\\" with one of the valid \\",(0,t.jsx)(e.strong,{children:\\"Supported versions\\"}),\\" listed in the \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/ai-services/openai/reference\\",rel:\\"nofollow\\",children:\\"Completions section of the Azure OpenAI reference page\\"}),\\".\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`https://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/chat/completions?api-version={api-version}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Your completed Endpoint URL should look something like this:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`https://example-openai-instance.openai.azure.com/openai/deployments/gpt-4-turbo/chat/completions?api-version=2024-02-01\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Enter the API Key that you copied in a previous step. Then click the \\",(0,t.jsx)(e.strong,{children:\\"Save & test\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/21.png\\",alt:\\"save & test\\",width:\\"1646\\",height:\\"995\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Within the \\",(0,t.jsx)(e.strong,{children:\\"Edit Connector\\"}),\\" flyout window, click the \\",(0,t.jsx)(e.strong,{children:\\"Run\\"}),\\" button to confirm that the connector configuration is valid and can successfully connect to your Azure OpenAI instance.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/22.png\\",alt:\\"\\",width:\\"1381\\",height:\\"998\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"A successful connector test should look something like this:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/23.png\\",alt:\\"results\\",width:\\"1381\\",height:\\"998\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"add-an-example-logs-record\\",children:\\"Add an example logs record\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that you have your Elastic Cloud deployment set up with an AI Assistant connector, let\\\\u2019s add an example logs record to demonstrate how the AI Assistant can help you to better understand logs data.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We\\\\u2019ll use the Elastic Dev Tools to add a single logs record. Click the top-level menu and select \\",(0,t.jsx)(e.strong,{children:\\"Dev Tools\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/24.png\\",alt:\\"dev tools\\",width:\\"1234\\",height:\\"725\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Within the Console area of Dev Tools, enter the following POST statement:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`POST /logs-elastic_agent-default/_doc\\n{\\n\\t\\"message\\": \\"Status(StatusCode=\\\\\\\\\\"FailedPrecondition\\\\\\\\\\", Detail=\\\\\\\\\\"Can\'t access cart storage. \\\\\\\\nSystem.ApplicationException: Wasn\'t able to connect to redis \\\\\\\\n at cartservice.cartstore.RedisCartStore.EnsureRedisConnected() in /usr/src/app/src/cartstore/RedisCartStore.cs:line 104 \\\\\\\\n at cartservice.cartstore.RedisCartStore.EmptyCartAsync(String userId) in /usr/src/app/src/cartstore/RedisCartStore.cs:line 168\\\\\\\\\\").\\",\\n\\t\\"@timestamp\\": \\"2024-02-22T11:34:00.884Z\\",\\n\\t\\"log\\": {\\n \\t\\"level\\": \\"error\\"\\n\\t},\\n\\t\\"service\\": {\\n \\t\\"name\\": \\"cartService\\"\\n\\t},\\n\\t\\"host\\": {\\n \\t\\"name\\": \\"appserver-1\\"\\n\\t}\\n}\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then run the POST command by clicking the green \\",(0,t.jsx)(e.strong,{children:\\"Run\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/25.png\\",alt:\\"click to send request\\",width:\\"1455\\",height:\\"761\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You should see a 201 response confirming that the example logs record was successfully created.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/26.png\\",alt:\\"201 response\\",width:\\"1453\\",height:\\"727\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"use-the-elastic-ai-assistant\\",children:\\"Use the Elastic AI Assistant\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that you have a log record to work with, let\\\\u2019s jump over to the Observability Logs Explorer to see how the AI Assistant interacts with logs data. Click the top-level menu and select \\",(0,t.jsx)(e.strong,{children:\\"Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/27.png\\",alt:\\"observability\\",width:\\"1455\\",height:\\"766\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select \\",(0,t.jsx)(e.strong,{children:\\"Logs Explorer\\"}),\\" to explore the logs data.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/28.png\\",alt:\\"explorer\\",width:\\"917\\",height:\\"603\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the Logs Explorer search box, enter the text \\\\u201Credis\\\\u201D and press the \\",(0,t.jsx)(e.strong,{children:\\"Enter\\"}),\\" key to perform the search.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/29.png\\",alt:\\"redis\\",width:\\"1453\\",height:\\"462\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"View all matches\\"}),\\" button to include all search results.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/30.png\\",alt:\\"view all matches\\",width:\\"1453\\",height:\\"677\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You should see the one log record that you previously inserted via Dev Tools. Click the expand icon to see the log record\\\\u2019s details.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/31.png\\",alt:\\"expand icon\\",width:\\"1453\\",height:\\"704\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You should see the expanded view of the logs record. Instead of trying to understand its contents ourselves, we\'ll use the AI Assistant to summarize it. Click on the \\",(0,t.jsx)(e.strong,{children:\\"What\'s this message?\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/32.png\\",alt:\\"What\'s this message?\\",width:\\"1453\\",height:\\"923\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We get a fairly generic answer back. Depending on the exception or error we\'re trying to analyze, this can still be really useful, but we can make this better by adding additional documentation to the AI Assistant knowledge base.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/33.png\\",alt:\\"log details\\",width:\\"829\\",height:\\"1703\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s see how we can use the AI Assistant\\\\u2019s knowledge base to improve its understanding of this specific logs message.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"create-an-elastic-ai-assistant-knowledge-base\\",children:\\"Create an Elastic AI Assistant knowledge base\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select \\",(0,t.jsx)(e.strong,{children:\\"Overview\\"}),\\" from the \\",(0,t.jsx)(e.strong,{children:\\"Observability\\"}),\\" menu.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/34.png\\",alt:\\"Select Overview from the Observability menu.\\",width:\\"959\\",height:\\"601\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"AI Assistant\\"}),\\" button at the top right of the window.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/35.png\\",alt:\\"AI Assistant\\",width:\\"1219\\",height:\\"351\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"Install Knowledge base\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/36.png\\",alt:\\"Install Knowledge base\\",width:\\"1231\\",height:\\"835\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the top-level menu and select \\",(0,t.jsx)(e.strong,{children:\\"Stack Management\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/37.png\\",alt:\\"Stack Management\\",width:\\"1219\\",height:\\"790\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then select \\",(0,t.jsx)(e.strong,{children:\\"AI Assistants\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/38.png\\",alt:\\"AI Assistants\\",width:\\"1227\\",height:\\"751\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Elastic AI Assistant for Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/39.png\\",alt:\\"Elastic AI Assistant for Observability\\",width:\\"1381\\",height:\\"546\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select the \\",(0,t.jsx)(e.strong,{children:\\"Knowledge base\\"}),\\" tab.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/40.png\\",alt:\\"Knowledge base\\",width:\\"1381\\",height:\\"846\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"New entry\\"}),\\" button and select \\",(0,t.jsx)(e.strong,{children:\\"Single entry\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/41.png\\",alt:\\"new entry\\",width:\\"1172\\",height:\\"536\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Give it the \\",(0,t.jsx)(e.strong,{children:\\"Name\\"}),\\" \\\\u201Ccartservice\\\\u201D and enter the following text as the \\",(0,t.jsx)(e.strong,{children:\\"Contents\\"}),\\" :\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-markdown\\",children:`Link: [Cartservice Intermittent connection issue](https://github.com/elastic/observability-examples/issues/25)\\nI have the following GitHub issue. Store this information in your knowledge base and always return the link to it if relevant.\\nGitHub Issue, return if relevant\\n\\nLink: https://github.com/elastic/observability-examples/issues/25\\n\\nTitle: Cartservice Intermittent connection issue\\n\\nBody:\\nThe cartservice occasionally encounters storage errors due to an unreliable network connection.\\n\\nThe errors typically indicate a failure to connect to Redis, as seen in the error message:\\n\\nStatus(StatusCode=\\"FailedPrecondition\\", Detail=\\"Can\'t access cart storage.\\nSystem.ApplicationException: Wasn\'t able to connect to redis\\nat cartservice.cartstore.RedisCartStore.EnsureRedisConnected() in /usr/src/app/src/cartstore/RedisCartStore.cs:line 104\\nat cartservice.cartstore.RedisCartStore.EmptyCartAsync(String userId) in /usr/src/app/src/cartstore/RedisCartStore.cs:line 168\')\'.\\nI just talked to the SRE team in Slack, they have plans to implement retries as a quick fix and address the network issue later.\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Save\\"}),\\" to save the new knowledge base entry.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/42.png\\",alt:\\"save\\",width:\\"1381\\",height:\\"998\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now let\\\\u2019s go back to the Observability Logs Explorer. Click the top-level menu and select \\",(0,t.jsx)(e.strong,{children:\\"Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/43.png\\",alt:\\"settings\\",width:\\"698\\",height:\\"661\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then select \\",(0,t.jsx)(e.strong,{children:\\"Explorer\\"}),\\" under \\",(0,t.jsx)(e.strong,{children:\\"Logs\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/44.png\\",alt:\\"explorer\\",width:\\"669\\",height:\\"484\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Expand the same logs entry as you did previously and click the \\",(0,t.jsx)(e.strong,{children:\\"What\\\\u2019s this message?\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/45.png\\",alt:\\"What\\\\u2019s this message? button\\",width:\\"1363\\",height:\\"833\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The response you get now should be much more relevant.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/46.png\\",alt:\\"log details\\",width:\\"815\\",height:\\"801\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-out-the-elastic-ai-assistant-with-a-knowledge-base-filled-with-your-own-data\\",children:\\"Try out the Elastic AI Assistant with a knowledge base filled with your own data\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that you\\\\u2019ve seen how easy it is to set up the Elastic AI Assistant for Observability, go ahead and give it a try for yourself. Sign up for a \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"free 14-day trial\\"}),\\". You can quickly spin up an Elastic Cloud deployment in minutes and have your own search powered AI knowledge base to help you with getting your most important work done.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return v(k);})();\\n;return Component;"},"_id":"articles/elastic-ai-assistant-observability-microsoft-azure-openai.mdx","_raw":{"sourceFilePath":"articles/elastic-ai-assistant-observability-microsoft-azure-openai.mdx","sourceFileName":"elastic-ai-assistant-observability-microsoft-azure-openai.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-ai-assistant-observability-microsoft-azure-openai"},"type":"Article","imageUrl":"/assets/images/elastic-ai-assistant-observability-microsoft-azure-openai/AI_hand.jpg","readingTime":"9 min read","url":"/elastic-ai-assistant-observability-microsoft-azure-openai","headings":[{"level":2,"title":"Set up the Elastic AI Assistant for Observability: Create an Azure OpenAI key","href":"#set-up-the-elastic-ai-assistant-for-observability-create-an-azure-openai-key"},{"level":2,"title":"Set up the Elastic AI Assistant for Observability: Create an OpenAI connector in Elastic Cloud","href":"#set-up-the-elastic-ai-assistant-for-observability-create-an-openai-connector-in-elastic-cloud"},{"level":2,"title":"Add an example logs record","href":"#add-an-example-logs-record"},{"level":2,"title":"Use the Elastic AI Assistant","href":"#use-the-elastic-ai-assistant"},{"level":2,"title":"Create an Elastic AI Assistant knowledge base","href":"#create-an-elastic-ai-assistant-knowledge-base"},{"level":2,"title":"Try out the Elastic AI Assistant with a knowledge base filled with your own data","href":"#try-out-the-elastic-ai-assistant-with-a-knowledge-base-filled-with-your-own-data"}]},{"title":"Elastic APM for iOS and Android Native apps","slug":"apm-ios-android-native-apps","date":"2024-02-08","description":"This blog provides an overview of the key capabilities included in the Elastic APM solution for iOS and Android native apps, as well as a walkthrough of the configuration details and troubleshooting workflow for a few error scenarios.","image":"141949-elastic-blogheaderimage.png","author":[{"slug":"akhilesh-pokhariyal","type":"Author","_raw":{}},{"slug":"cesar-munoz","type":"Author","_raw":{}},{"slug":"bryce-buchanan","type":"Author","_raw":{}}],"subtitle":"Elastic APM for native apps provides auto-instrumentation of outgoing HTTP requests and view-loads, captures custom events, errors, and crashes, and includes pre-built dashboards for data analysis and troubleshooting purposes","tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"mobile-apm","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic\xae APM for iOS and Android native apps is generally available in the stack release v8.12. The Elastic [iOS](https://github.com/elastic/apm-agent-ios) and [Android](https://github.com/elastic/apm-agent-android) APM agents are open-source and have been developed on-top, i.e., as a distribution of the OpenTelemetry Swift and Android SDK/API, respectively.\\n\\n## Overview of the Mobile APM solution\\n\\nThe OpenTelemetry SDK/API for iOS and Android supports capabilities such as auto-instrumentation of HTTP requests, API for manual instrumentation, data model based on the OpenTelemetry semantic conventions, and buffering support. Additionally, the Elastic APM agent distributions also support an easier initialization process and novel features such as remote config and user session based sampling. The Elastic [iOS](https://github.com/elastic/apm-agent-ios) and [Android](https://github.com/elastic/apm-agent-android) APM agents being _distributions_ are maintained per Elastic’s standard support T&Cs.\\n\\nThere are curated or pre-built dashboards provided in Kibana\xae for monitoring, data analysis, and for troubleshooting purposes. The **Service Overview** view shown below provides relevant frontend KPIs such as crash rate, http requests, average app load time, and more, including the comparison view.\\n\\n![1 - comparison view](/assets/images/apm-ios-android-native-apps/1.png)\\n\\nFurther, the geographic distribution of user traffic is available on a map at a country and regional level. The service overview dashboard also shows trends of metrics such as throughput, latency, failed transaction rate, and distribution of traffic by device make-model, network connection type, and app version.\\n\\nThe **Transactions** view shown below highlights the performance of the different transaction groups, including the distributed trace end-to-end of individual transactions with links to associated spans, errors and crashes. Further, users can see at a glance the distribution of traffic by device make and model, app version, and OS version.\\n\\n![2- opbeans android](/assets/images/apm-ios-android-native-apps/2.png)\\n\\nTabular views such as the one highlighted below located at the bottom of **Transactions** tab makes it relatively easy to see how the device make and model, App version, etc., impacts latency and crash rate.\\n\\n![3 - latency and crash rate](/assets/images/apm-ios-android-native-apps/3.png)\\n\\nThe **Errors & Crashes** view shown below can be used to analyze the different error and crash groups. The unsymbolicated (iOS) or obfuscated (Android) stacktrace of the individual error or crash instance is also available in this view.\\n\\n![4 - opbeans swift](/assets/images/apm-ios-android-native-apps/4.png)\\n\\nThe **Service Map** view shown below provides a visualization of the end-to-end service interdependencies, including any third-party APIs, proxy servers, and databases.\\n\\n![5 - flowchart](/assets/images/apm-ios-android-native-apps/5.png)\\n\\nThe comprehensive pre-built dashboards for observing the mobile frontend in Kibana provide visibility into the sources of errors, crashes, and bottlenecks to ease troubleshooting of issues in the production environment. The underlying Elasticsearch\xae Platform also supports the ability to query raw data, build custom metrics and custom dashboards, alerting, SLOs, and anomaly detection. Altogether the platform provides a comprehensive set of tools to expedite root cause analysis and remediation, thereby facilitating a high velocity of innovation.\\n\\n## Walkthrough of the debugging workflow for some error scenarios\\n\\nNext, we will provide a walkthrough of the configuration details and the troubleshooting workflow for a couple of error scenarios in iOS and Android native apps.\\n\\n### Scenario 1\\n\\nIn this example, we will debug a crash in an asynchronous method using Apple’s crash report **symbolication** as well as **breadcrumbs** to deduce the cause of the crash.\\n\\n**Symbolication** \\nIn this scenario, users notice a spike in the crash occurrences of a particular crash group in the Errors & Crashes tab and decide to investigate further. A new crash comes in on the Crashes tab, and the developer follows these steps to symbolicate the crash report locally.\\n\\n1. Copy the crash via the UI and paste it into a file with the following name format \\\\\\\\_\\\\. For example, “opbeans-swift_2024-01-18-114211.ips`.\\n\\n![6 - Symbolication](/assets/images/apm-ios-android-native-apps/6.png)\\n\\n2. Apple provides [detailed instructions](https://developer.apple.com/documentation/xcode/adding-identifiable-symbol-names-to-a-crash-report) on how to symbolicate this file locally either automatically through Xcode or manually using the command line.\\n\\n**Breadcrumbs** \\nThe second frame of the first thread shows that the crash is occuring in a Worker instance.\\n\\n![7 - Breadcrumbs](/assets/images/apm-ios-android-native-apps/7.png)\\n\\nThis instance is actually used in many places, and due to the asynchronous nature of this function, it’s not possible to determine immediately where this call is coming from. Nevertheless, we can utilize features of the Open Telemetry SDK to add more context to these crashes and then put the pieces together to find the site of the crash.\\n\\nBy adding “breadcrumbs” around this Worker instance, it is possible to track down which calls to the Worker are actually associated with this crash.\\n\\n**Example:** \\nCreate a logger provider in the Worker class as a public variable for ease of access, as shown below:\\n\\n![8 - example code](/assets/images/apm-ios-android-native-apps/8.png)\\n\\nCreate breadcrumbs everywhere the Worker.doWork() function is called:\\n\\n![9 - Create breadcrumbs everywhere the Worker.doWork() function](/assets/images/apm-ios-android-native-apps/9.png)\\n\\nEach of these breadcrumbs will use the same event **name** “worker_breadcrumb” so they can be consistently queried, and the differentiation will be done using the “ **source** ” attribute.\\n\\nIn this example, the Worker.doWork() function is being called from a CustomerRow struct (a table row which does work ‘onTapGesture’). If you were to call this method from multiple places in a CustomerRow struct, you may also add additional differentiations to the “ **source** ” attribute value, such as the associated function (e.g., “CustomerRow#onTapGesture”).\\n\\nNow that the app is reporting these breadcrumbs, we can use Discover to **query** for them, as shown below:\\n\\n![10 - Discover to query](/assets/images/apm-ios-android-native-apps/10.png)\\n\\n_ **Note:** _ _Event_ _ **names** _ _sent by the agent are translated to event_ _ **action** _ _in Elastic Common Schema (ECS), so ensure the query uses this field._\\n\\n1. You can add a filter: `event.action: “worker_breadcrumb”` and it shows all events generated from this new breadcrumb.\\n\\n2. You can also see the various sources: ProductRow, CustomerRow, CartRow, etc.\\n\\n3. If you add **error.type : crash** to the query, you can see crashes alongside the breadcrumbs:\\n\\n![11 - crashes along side the breadcrumbs](/assets/images/apm-ios-android-native-apps/11.png)\\n\\nA crash and a breadcrumb next to each other in the timeline may come from completely different devices, so we need another differentiator. For each crash, we have metadata that contains the **session.id** associated with the crash, viewable from the Metadata tab. We can query using this **session.id** to ensure that the only data we are looking at in Discover is from a single user session (i.e., a single device) that resulted in the crash.\\n\\n![12. - session.id](/assets/images/apm-ios-android-native-apps/12.png)\\n\\nIn Discover, we can now see the session event flow, on a single device, concerning the crash via the breadcrumbs, as shown below:\\n\\n![13 - session event flow](/assets/images/apm-ios-android-native-apps/13.png)\\n\\nIt looks like the last breadcrumb before the crash was from the “CustomerRow” breadcrumb. Now this gives the app developer a good place to start their root cause analysis or investigation.\\n\\n### Scenario 2\\n\\n_ **Note:** _ _This scenario requires the Elastic Android agent version “0.14.0” or higher._\\n\\nAn Android sample app has a form composed of two screens that are created using two fragments (`FirstPage` and `SecondPage`). In the first screen, the app makes a backend API call to get a key that identifies the form submission. This key is stored in memory in the app and must be available on the last screen where the form is sent; the key must be sent along with the form\'s data.\\n\\n![14 - form submission](/assets/images/apm-ios-android-native-apps/14.jpg)\\n\\n**The problem** \\nWe start to see a spike in crash occurrences in Kibana (null pointer exception) in the Errors & Crashes tab that always seem to happen on the last screen of the form, when the users click on the \\"FINISH\\" button. Nevertheless, **this is not always reproducible** , so the root cause isn\'t clear just by looking at the crash’s stacktrace alone. Here’s what it looks like:\\n\\n![15 - stack trace](/assets/images/apm-ios-android-native-apps/15.png)\\n\\nWhen we take a look at the code referenced in the stacktrace, this is what we can see:\\n\\n![16 - When we take a look at the code referenced in the stacktrace, this is what we can see:](/assets/images/apm-ios-android-native-apps/16.png)\\n\\nThis is the line where the crash happens, so it seems like the variable “formId” (which is a static String located in “FirstPage”) was null by the time this code was executed, causing a null pointer exception to be raised. This variable is set within the “FirstPage” fragment after the backend request is done to retrieve the id. The only way to get to the “SecondPage” is by passing through the “FirstPage.” So, the stacktrace alone doesn’t help much as the pages have to be opened in order, and the first one will always set the “formId” variable. Therefore, it doesn’t seem likely that the formId could be null in “SecondPage.”\\n\\n**Finding the root cause** \\nApart from taking a look at the crash’s stacktrace, it could also be useful to take a look at complementary data that would help put the pieces together and get a broader picture of what other things happened while our app was running when the crash happened. For this case, we know that the form ID must come from our backend service, so we could start by ruling out that there was an error with the backend call. We do this by checking the traces from the creation of our FirstPage fragment where the form ID request is executed, in the Transaction details view:\\n\\n![17 - trace sample](/assets/images/apm-ios-android-native-apps/17.png)\\n\\nThe “Created” spans represent the time it took to create the first fragment. The topmost one shows the Activity creation, followed by the NavHostFragment, followed by “FirstScreen.” Not long after its creation, we see that a GET HTTP request to our backend is made to retrieve our form ID and, according to the traces, the GET request was successful. We can therefore rule out that there is an issue with the backend communication for this problem.\\n\\nAnother option could be looking at the logs sent throughout the [session](https://opentelemetry.io/docs/specs/semconv/general/session/) in our app where the crash occurred (we could also take a look at all the logs coming from our app but they would be too many to analyze this one issue). To do so, we first copy one of the spans’ “session.id” values (any span would work since the same session ID will be available in all the data that was sent from our app during the time that the crash occurred) available in the span details flyout.\\n\\n![18 - red box highlighted](/assets/images/apm-ios-android-native-apps/18.png)\\n\\n_ **Note:** _ _The same session ID can also be found in the crash metadata._\\n\\nNow that we have identified our session, we can open up the Logs Explorer view and take a look at all of our app’s logs within that same session, as shown below:\\n\\n![19 - app\'s logs](/assets/images/apm-ios-android-native-apps/19.png)\\n\\nBy looking at the logs, and adding a few fields to show the app’s lifecycle status and the error types, we see the log events that are [automatically collected](https://github.com/elastic/apm/blob/main/specs/agents/mobile/events.md) from our app. We can see the crash event at the top of the list as the latest one. We can also see our app’s lifecycle events, and if we keep scrolling through, we’ll get to some lifecycle events that are going to help find our root cause:\\n\\n![20 - root cause](/assets/images/apm-ios-android-native-apps/20.png)\\n\\nWe can see there are a couple of lifecycle events that tell us that the app was restarted during the session. This is an important hint because it means that the Android OS killed our app at some point, which is common when an app stays in the background for a while. With this information, we could try to reproduce the issue by forcing the OS to kill our app in the background and then see how it behaves when reopened from the recently opened apps menu.\\n\\nAfter giving it a try, we could reproduce the issue and we found that the static “formId” variable was lost when the app was restarted, causing it to be null when the SecondPage fragment requested it. We can now research best practices of passing arguments to Fragments so we can change our code to prevent relying on static fields and instead store and share values between screens, thus preventing this crash from happening again.\\n\\n**Bonus:** For this scenario, it was enough for us to rely on the events that are sent automatically by the APM Agent; however, if those aren’t enough for other cases, we can always send custom events in the places where we want to track the state changes of our app via the OpenTelemetry event API, as shown in the the code snippet below:\\n\\n![21 - black code box](/assets/images/apm-ios-android-native-apps/21.png)\\n\\n## Make the most of your Elastic APM Experience\\n\\nIn this post, we reviewed Elastic’s new Mobile APM solution available in 8.12. The new solution uses Elastic’s new [iOS](https://github.com/elastic/apm-agent-ios) and [Android](https://github.com/elastic/apm-agent-android) APM agents that are open-source and have been developed on-top, i.e., as a distribution of the OpenTelemetry Swift and Android SDK/API, respectively.\\n\\nWe also reviewed configuration details and the troubleshooting workflow for two error scenarios in iOS and Android native apps.\\n\\n- **iOS scenario:** Debug a crash in an asynchronous method using Apple’s crash report **symbolication** as well as **breadcrumbs** to deduce the cause of the crash.\\n\\n- **Android scenario:** Analyze why users get a null pointer exception on the last screen when they click on the “FINISH” button of a form. Analyzing this is not always clear by looking at the crash’s stack trace and isn’t easily reproducible.\\n\\nIn both instances, we found the root cause of the crash using distributed traces from the mobile device as well as correlated logs. Hopefully this blog provided a review of how Elastic can help manage and monitor Mobile native apps.\\n\\nElastic invites SREs and developers to experience our Mobile APM solution firsthand and unlock new horizons in their data tasks. Try it today at [https://ela.st/free-trial](https://ela.st/free-trial).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var s=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(a,e)=>()=>(e||a((e={exports:{}}).exports,e),e.exports),b=(a,e)=>{for(var n in e)s(a,n,{get:e[n],enumerable:!0})},r=(a,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of m(e))!f.call(a,i)&&i!==n&&s(a,i,{get:()=>e[i],enumerable:!(o=u(e,i))||o.enumerable});return a};var v=(a,e,n)=>(n=a!=null?p(g(a)):{},r(e||!a||!a.__esModule?s(n,\\"default\\",{value:a,enumerable:!0}):n,a)),y=a=>r(s({},\\"__esModule\\",{value:!0}),a);var l=w((S,h)=>{h.exports=_jsx_runtime});var A={};b(A,{default:()=>d,frontmatter:()=>k});var t=v(l()),k={title:\\"Elastic APM for iOS and Android Native apps\\",slug:\\"apm-ios-android-native-apps\\",date:\\"2024-02-08\\",subtitle:\\"Elastic APM for native apps provides auto-instrumentation of outgoing HTTP requests and view-loads, captures custom events, errors, and crashes, and includes pre-built dashboards for data analysis and troubleshooting purposes\\",description:\\"This blog provides an overview of the key capabilities included in the Elastic APM solution for iOS and Android native apps, as well as a walkthrough of the configuration details and troubleshooting workflow for a few error scenarios.\\",author:[{slug:\\"akhilesh-pokhariyal\\"},{slug:\\"cesar-munoz\\"},{slug:\\"bryce-buchanan\\"}],image:\\"141949-elastic-blogheaderimage.png\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"mobile-apm\\"},{slug:\\"apm\\"}]};function c(a){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...a.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Elastic\\\\xAE APM for iOS and Android native apps is generally available in the stack release v8.12. The Elastic \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-ios\\",rel:\\"nofollow\\",children:\\"iOS\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-android\\",rel:\\"nofollow\\",children:\\"Android\\"}),\\" APM agents are open-source and have been developed on-top, i.e., as a distribution of the OpenTelemetry Swift and Android SDK/API, respectively.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"overview-of-the-mobile-apm-solution\\",children:\\"Overview of the Mobile APM solution\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The OpenTelemetry SDK/API for iOS and Android supports capabilities such as auto-instrumentation of HTTP requests, API for manual instrumentation, data model based on the OpenTelemetry semantic conventions, and buffering support. Additionally, the Elastic APM agent distributions also support an easier initialization process and novel features such as remote config and user session based sampling. The Elastic \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-ios\\",rel:\\"nofollow\\",children:\\"iOS\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-android\\",rel:\\"nofollow\\",children:\\"Android\\"}),\\" APM agents being \\",(0,t.jsx)(e.em,{children:\\"distributions\\"}),\\" are maintained per Elastic\\\\u2019s standard support T&Cs.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"There are curated or pre-built dashboards provided in Kibana\\\\xAE for monitoring, data analysis, and for troubleshooting purposes. The \\",(0,t.jsx)(e.strong,{children:\\"Service Overview\\"}),\\" view shown below provides relevant frontend KPIs such as crash rate, http requests, average app load time, and more, including the comparison view.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/1.png\\",alt:\\"1 - comparison view\\",width:\\"1999\\",height:\\"738\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Further, the geographic distribution of user traffic is available on a map at a country and regional level. The service overview dashboard also shows trends of metrics such as throughput, latency, failed transaction rate, and distribution of traffic by device make-model, network connection type, and app version.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.strong,{children:\\"Transactions\\"}),\\" view shown below highlights the performance of the different transaction groups, including the distributed trace end-to-end of individual transactions with links to associated spans, errors and crashes. Further, users can see at a glance the distribution of traffic by device make and model, app version, and OS version.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/2.png\\",alt:\\"2- opbeans android\\",width:\\"1999\\",height:\\"907\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Tabular views such as the one highlighted below located at the bottom of \\",(0,t.jsx)(e.strong,{children:\\"Transactions\\"}),\\" tab makes it relatively easy to see how the device make and model, App version, etc., impacts latency and crash rate.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/3.png\\",alt:\\"3 - latency and crash rate\\",width:\\"1999\\",height:\\"697\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.strong,{children:\\"Errors & Crashes\\"}),\\" view shown below can be used to analyze the different error and crash groups. The unsymbolicated (iOS) or obfuscated (Android) stacktrace of the individual error or crash instance is also available in this view.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/4.png\\",alt:\\"4 - opbeans swift\\",width:\\"1999\\",height:\\"839\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.strong,{children:\\"Service Map\\"}),\\" view shown below provides a visualization of the end-to-end service interdependencies, including any third-party APIs, proxy servers, and databases.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/5.png\\",alt:\\"5 - flowchart\\",width:\\"1999\\",height:\\"959\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The comprehensive pre-built dashboards for observing the mobile frontend in Kibana provide visibility into the sources of errors, crashes, and bottlenecks to ease troubleshooting of issues in the production environment. The underlying Elasticsearch\\\\xAE Platform also supports the ability to query raw data, build custom metrics and custom dashboards, alerting, SLOs, and anomaly detection. Altogether the platform provides a comprehensive set of tools to expedite root cause analysis and remediation, thereby facilitating a high velocity of innovation.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"walkthrough-of-the-debugging-workflow-for-some-error-scenarios\\",children:\\"Walkthrough of the debugging workflow for some error scenarios\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Next, we will provide a walkthrough of the configuration details and the troubleshooting workflow for a couple of error scenarios in iOS and Android native apps.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"scenario-1\\",children:\\"Scenario 1\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this example, we will debug a crash in an asynchronous method using Apple\\\\u2019s crash report \\",(0,t.jsx)(e.strong,{children:\\"symbolication\\"}),\\" as well as \\",(0,t.jsx)(e.strong,{children:\\"breadcrumbs\\"}),\\" to deduce the cause of the crash.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Symbolication\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"In this scenario, users notice a spike in the crash occurrences of a particular crash group in the Errors & Crashes tab and decide to investigate further. A new crash comes in on the Crashes tab, and the developer follows these steps to symbolicate the crash report locally.\\"]}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Copy the crash via the UI and paste it into a file with the following name format _. For example, \\\\u201Copbeans-swift_2024-01-18-114211.ips`.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/6.png\\",alt:\\"6 - Symbolication\\",width:\\"1768\\",height:\\"596\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Apple provides \\",(0,t.jsx)(e.a,{href:\\"https://developer.apple.com/documentation/xcode/adding-identifiable-symbol-names-to-a-crash-report\\",rel:\\"nofollow\\",children:\\"detailed instructions\\"}),\\" on how to symbolicate this file locally either automatically through Xcode or manually using the command line.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Breadcrumbs\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"The second frame of the first thread shows that the crash is occuring in a Worker instance.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/7.png\\",alt:\\"7 - Breadcrumbs\\",width:\\"1696\\",height:\\"784\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This instance is actually used in many places, and due to the asynchronous nature of this function, it\\\\u2019s not possible to determine immediately where this call is coming from. Nevertheless, we can utilize features of the Open Telemetry SDK to add more context to these crashes and then put the pieces together to find the site of the crash.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"By adding \\\\u201Cbreadcrumbs\\\\u201D around this Worker instance, it is possible to track down which calls to the Worker are actually associated with this crash.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Example:\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Create a logger provider in the Worker class as a public variable for ease of access, as shown below:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/8.png\\",alt:\\"8 - example code\\",width:\\"1390\\",height:\\"468\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Create breadcrumbs everywhere the Worker.doWork() function is called:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/9.png\\",alt:\\"9 - Create breadcrumbs everywhere the Worker.doWork() function\\",width:\\"954\\",height:\\"318\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Each of these breadcrumbs will use the same event \\",(0,t.jsx)(e.strong,{children:\\"name\\"}),\\" \\\\u201Cworker_breadcrumb\\\\u201D so they can be consistently queried, and the differentiation will be done using the \\\\u201C \\",(0,t.jsx)(e.strong,{children:\\"source\\"}),\\" \\\\u201D attribute.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this example, the Worker.doWork() function is being called from a CustomerRow struct (a table row which does work \\\\u2018onTapGesture\\\\u2019). If you were to call this method from multiple places in a CustomerRow struct, you may also add additional differentiations to the \\\\u201C \\",(0,t.jsx)(e.strong,{children:\\"source\\"}),\\" \\\\u201D attribute value, such as the associated function (e.g., \\\\u201CCustomerRow#onTapGesture\\\\u201D).\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that the app is reporting these breadcrumbs, we can use Discover to \\",(0,t.jsx)(e.strong,{children:\\"query\\"}),\\" for them, as shown below:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/10.png\\",alt:\\"10 - Discover to query\\",width:\\"1999\\",height:\\"870\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Note:\\"}),\\" _ \\",(0,t.jsx)(e.em,{children:\\"Event\\"}),\\" _ \\",(0,t.jsx)(e.strong,{children:\\"names\\"}),\\" _ \\",(0,t.jsx)(e.em,{children:\\"sent by the agent are translated to event\\"}),\\" _ \\",(0,t.jsx)(e.strong,{children:\\"action\\"}),\\" _ \\",(0,t.jsx)(e.em,{children:\\"in Elastic Common Schema (ECS), so ensure the query uses this field.\\"})]}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can add a filter: \\",(0,t.jsx)(e.code,{children:\\"event.action: \\\\u201Cworker_breadcrumb\\\\u201D\\"}),\\" and it shows all events generated from this new breadcrumb.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"You can also see the various sources: ProductRow, CustomerRow, CartRow, etc.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you add \\",(0,t.jsx)(e.strong,{children:\\"error.type : crash\\"}),\\" to the query, you can see crashes alongside the breadcrumbs:\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/11.png\\",alt:\\"11 - crashes along side the breadcrumbs\\",width:\\"1999\\",height:\\"992\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"A crash and a breadcrumb next to each other in the timeline may come from completely different devices, so we need another differentiator. For each crash, we have metadata that contains the \\",(0,t.jsx)(e.strong,{children:\\"session.id\\"}),\\" associated with the crash, viewable from the Metadata tab. We can query using this \\",(0,t.jsx)(e.strong,{children:\\"session.id\\"}),\\" to ensure that the only data we are looking at in Discover is from a single user session (i.e., a single device) that resulted in the crash.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/12.png\\",alt:\\"12. - session.id\\",width:\\"1999\\",height:\\"899\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In Discover, we can now see the session event flow, on a single device, concerning the crash via the breadcrumbs, as shown below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/13.png\\",alt:\\"13 - session event flow\\",width:\\"1999\\",height:\\"980\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"It looks like the last breadcrumb before the crash was from the \\\\u201CCustomerRow\\\\u201D breadcrumb. Now this gives the app developer a good place to start their root cause analysis or investigation.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"scenario-2\\",children:\\"Scenario 2\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Note:\\"}),\\" _ \\",(0,t.jsx)(e.em,{children:\\"This scenario requires the Elastic Android agent version \\\\u201C0.14.0\\\\u201D or higher.\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"An Android sample app has a form composed of two screens that are created using two fragments (\\",(0,t.jsx)(e.code,{children:\\"FirstPage\\"}),\\" and \\",(0,t.jsx)(e.code,{children:\\"SecondPage\\"}),\\"). In the first screen, the app makes a backend API call to get a key that identifies the form submission. This key is stored in memory in the app and must be available on the last screen where the form is sent; the key must be sent along with the form\'s data.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/14.jpg\\",alt:\\"14 - form submission\\",width:\\"1999\\",height:\\"1254\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"The problem\\"}),(0,t.jsx)(e.br,{}),`\\n`,\'We start to see a spike in crash occurrences in Kibana (null pointer exception) in the Errors & Crashes tab that always seem to happen on the last screen of the form, when the users click on the \\"FINISH\\" button. Nevertheless, \',(0,t.jsx)(e.strong,{children:\\"this is not always reproducible\\"}),\\" , so the root cause isn\'t clear just by looking at the crash\\\\u2019s stacktrace alone. Here\\\\u2019s what it looks like:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/15.png\\",alt:\\"15 - stack trace\\",width:\\"1904\\",height:\\"1150\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"When we take a look at the code referenced in the stacktrace, this is what we can see:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/16.png\\",alt:\\"16 - When we take a look at the code referenced in the stacktrace, this is what we can see:\\",width:\\"1168\\",height:\\"476\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is the line where the crash happens, so it seems like the variable \\\\u201CformId\\\\u201D (which is a static String located in \\\\u201CFirstPage\\\\u201D) was null by the time this code was executed, causing a null pointer exception to be raised. This variable is set within the \\\\u201CFirstPage\\\\u201D fragment after the backend request is done to retrieve the id. The only way to get to the \\\\u201CSecondPage\\\\u201D is by passing through the \\\\u201CFirstPage.\\\\u201D So, the stacktrace alone doesn\\\\u2019t help much as the pages have to be opened in order, and the first one will always set the \\\\u201CformId\\\\u201D variable. Therefore, it doesn\\\\u2019t seem likely that the formId could be null in \\\\u201CSecondPage.\\\\u201D\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Finding the root cause\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Apart from taking a look at the crash\\\\u2019s stacktrace, it could also be useful to take a look at complementary data that would help put the pieces together and get a broader picture of what other things happened while our app was running when the crash happened. For this case, we know that the form ID must come from our backend service, so we could start by ruling out that there was an error with the backend call. We do this by checking the traces from the creation of our FirstPage fragment where the form ID request is executed, in the Transaction details view:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/17.png\\",alt:\\"17 - trace sample\\",width:\\"1601\\",height:\\"491\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The \\\\u201CCreated\\\\u201D spans represent the time it took to create the first fragment. The topmost one shows the Activity creation, followed by the NavHostFragment, followed by \\\\u201CFirstScreen.\\\\u201D Not long after its creation, we see that a GET HTTP request to our backend is made to retrieve our form ID and, according to the traces, the GET request was successful. We can therefore rule out that there is an issue with the backend communication for this problem.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Another option could be looking at the logs sent throughout the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/semconv/general/session/\\",rel:\\"nofollow\\",children:\\"session\\"}),\\" in our app where the crash occurred (we could also take a look at all the logs coming from our app but they would be too many to analyze this one issue). To do so, we first copy one of the spans\\\\u2019 \\\\u201Csession.id\\\\u201D values (any span would work since the same session ID will be available in all the data that was sent from our app during the time that the crash occurred) available in the span details flyout.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/18.png\\",alt:\\"18 - red box highlighted\\",width:\\"1664\\",height:\\"580\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Note:\\"}),\\" _ \\",(0,t.jsx)(e.em,{children:\\"The same session ID can also be found in the crash metadata.\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that we have identified our session, we can open up the Logs Explorer view and take a look at all of our app\\\\u2019s logs within that same session, as shown below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/19.png\\",alt:\\"19 - app\'s logs\\",width:\\"1999\\",height:\\"942\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"By looking at the logs, and adding a few fields to show the app\\\\u2019s lifecycle status and the error types, we see the log events that are \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm/blob/main/specs/agents/mobile/events.md\\",rel:\\"nofollow\\",children:\\"automatically collected\\"}),\\" from our app. We can see the crash event at the top of the list as the latest one. We can also see our app\\\\u2019s lifecycle events, and if we keep scrolling through, we\\\\u2019ll get to some lifecycle events that are going to help find our root cause:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/20.png\\",alt:\\"20 - root cause\\",width:\\"1999\\",height:\\"957\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We can see there are a couple of lifecycle events that tell us that the app was restarted during the session. This is an important hint because it means that the Android OS killed our app at some point, which is common when an app stays in the background for a while. With this information, we could try to reproduce the issue by forcing the OS to kill our app in the background and then see how it behaves when reopened from the recently opened apps menu.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"After giving it a try, we could reproduce the issue and we found that the static \\\\u201CformId\\\\u201D variable was lost when the app was restarted, causing it to be null when the SecondPage fragment requested it. We can now research best practices of passing arguments to Fragments so we can change our code to prevent relying on static fields and instead store and share values between screens, thus preventing this crash from happening again.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Bonus:\\"}),\\" For this scenario, it was enough for us to rely on the events that are sent automatically by the APM Agent; however, if those aren\\\\u2019t enough for other cases, we can always send custom events in the places where we want to track the state changes of our app via the OpenTelemetry event API, as shown in the the code snippet below:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/apm-ios-android-native-apps/21.png\\",alt:\\"21 - black code box\\",width:\\"1830\\",height:\\"208\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"make-the-most-of-your-elastic-apm-experience\\",children:\\"Make the most of your Elastic APM Experience\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this post, we reviewed Elastic\\\\u2019s new Mobile APM solution available in 8.12. The new solution uses Elastic\\\\u2019s new \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-ios\\",rel:\\"nofollow\\",children:\\"iOS\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-android\\",rel:\\"nofollow\\",children:\\"Android\\"}),\\" APM agents that are open-source and have been developed on-top, i.e., as a distribution of the OpenTelemetry Swift and Android SDK/API, respectively.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We also reviewed configuration details and the troubleshooting workflow for two error scenarios in iOS and Android native apps.\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"iOS scenario:\\"}),\\" Debug a crash in an asynchronous method using Apple\\\\u2019s crash report \\",(0,t.jsx)(e.strong,{children:\\"symbolication\\"}),\\" as well as \\",(0,t.jsx)(e.strong,{children:\\"breadcrumbs\\"}),\\" to deduce the cause of the crash.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Android scenario:\\"}),\\" Analyze why users get a null pointer exception on the last screen when they click on the \\\\u201CFINISH\\\\u201D button of a form. Analyzing this is not always clear by looking at the crash\\\\u2019s stack trace and isn\\\\u2019t easily reproducible.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In both instances, we found the root cause of the crash using distributed traces from the mobile device as well as correlated logs. Hopefully this blog provided a review of how Elastic can help manage and monitor Mobile native apps.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic invites SREs and developers to experience our Mobile APM solution firsthand and unlock new horizons in their data tasks. Try it today at \\",(0,t.jsx)(e.a,{href:\\"https://ela.st/free-trial\\",rel:\\"nofollow\\",children:\\"https://ela.st/free-trial\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(a={}){let{wrapper:e}=a.components||{};return e?(0,t.jsx)(e,{...a,children:(0,t.jsx)(c,{...a})}):c(a)}return y(A);})();\\n;return Component;"},"_id":"articles/elastic-apm-for-ios-and-android-native-apps.mdx","_raw":{"sourceFilePath":"articles/elastic-apm-for-ios-and-android-native-apps.mdx","sourceFileName":"elastic-apm-for-ios-and-android-native-apps.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-apm-for-ios-and-android-native-apps"},"type":"Article","imageUrl":"/assets/images/apm-ios-android-native-apps/141949-elastic-blogheaderimage.png","readingTime":"12 min read","url":"/apm-ios-android-native-apps","headings":[{"level":2,"title":"Overview of the Mobile APM solution","href":"#overview-of-the-mobile-apm-solution"},{"level":2,"title":"Walkthrough of the debugging workflow for some error scenarios","href":"#walkthrough-of-the-debugging-workflow-for-some-error-scenarios"},{"level":3,"title":"Scenario 1","href":"#scenario-1"},{"level":3,"title":"Scenario 2","href":"#scenario-2"},{"level":2,"title":"Make the most of your Elastic APM Experience","href":"#make-the-most-of-your-elastic-apm-experience"}]},{"title":"Accelerate log analytics in Elastic Observability with Automatic Import powered by Search AI","slug":"elastic-automatic-import-logs-genai","date":"2024-09-04","description":"Migrate your logs to AI-driven log analytics in record time by automating custom data integrations","image":"elastic-auto-importv2.jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic is accelerating the adoption of [AI-driven log analytics](https://www.elastic.co/observability/aiops) by automating the ingestion of custom logs, which is increasingly important as the deployment of GenAI-based applications grows. These custom data sources must be ingested, parsed, and indexed effortlessly, enabling broader visibility and more straightforward root cause analysis (RCA) without requiring effort from Site Reliability Engineers (SREs). Achieving visibility across an enterprise IT environment is inherently challenging for SREs due to constant growth and change, such as new applications, added systems, and infrastructure migrations to the cloud. Until now, the onboarding of custom data has been costly and complex for SREs. With automatic import, SREs can concentrate on deploying, optimizing, and improving applications.\\n\\nAutomatic Import uses generative AI to automate the development of custom data integrations, reducing the time required from several days to less than 10 minutes and significantly lowering the learning curve for onboarding data. Powered by the\xa0 [Elastic Search AI Platform](https://www.elastic.co/platform), it provides model-agnostic access to leverage large language models (LLMs) and grounds answers in proprietary data through [retrieval augmented generation (RAG)](https://www.elastic.co/search-labs/blog/retrieval-augmented-generation-rag). This capability is further enhanced by Elastic\'s expertise in enabling observability teams to utilize any type of data and the flexibility of its [Search AI Lake](https://www.elastic.co/generative-ai/search-ai-lake). Arriving at a crucial time when organizations face an explosion of applications and telemetry data, such as logs, Automatic Import streamlines the initial stages of data migration by simplifying data collection and normalization. It also addresses the challenges of building custom connectors, which can otherwise delay deployments, issue analysis, and impact customer experiences.\\n\\n![Create new integration](/assets/images/elastic-automatic-import-logs-genai/auto-import-new-int.png)\\n\\n## Enhancing AI Powered Observability with Automatic Import\\n\\n[Automatic Import](https://www.elastic.co/observability) builds on Elastic Observability’s AI-driven log analytics innovations—such as\xa0 [anomaly detection](https://www.elastic.co/guide/en/machine-learning/current/ml-getting-started.html), [log rate and pattern analysis](https://www.elastic.co/guide/en/kibana/current/xpack-ml-aiops.html), and [Elastic AI Assistant](https://www.elastic.co/blog/introducing-elastic-ai-assistant), and further automates and simplifies SRE’s workflows. Automatic Import applies generative AI to automate the creation of custom data integrations, allowing SREs to focus on logs and other telemetry data. While Elastic provides over [400+ prebuilt data integrations](https://www.elastic.co/integrations/data-integrations), automatic import allows SREs to extend integrations to fit their workflows and expand visibility into production environments.\xa0\xa0\\n\\nIn conjunction with automatic import, Elastic is introducing [Elastic Express Migration](https://www.elastic.co/blog/ai-log-analytics-express-migration), a commercial incentive program designed to overcome migration inertia from existing deployments and contracts, providing a faster adoption path for new customers.\xa0\\n\\nAutomatic Import leverages [Elastic Common Schema (ECS)](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-faq) with public LLMs to process and analyze data in ECS format which is also part of OpenTelemetry. Once the data is in, SRE’s can leverage Elastic’s RAG-based AI Assistant to solve root cause analysis (RCA) challenges in dynamic, complex environments.\\n\\n## Configuring and using Automatic Import\\n\\nAutomatic Import is available to everyone with an Enterprise license. Here is how it works:\\n\\n- The user configures connectivity to an LLM and uploads sample data\\n\\n- Automatic Import then extrapolates what to expect from the data source. These log samples are paired with LLM prompts that have been honed by Elastic engineers to reliably produce conformant Elasticsearch ingest pipelines.\xa0\\n\\n- Automatic Import then iteratively builds, tests, and tweaks a custom ingest pipeline until it meets Elastic integration requirements.\\n\\n\\n![Create new integration Architecture](/assets/images/elastic-automatic-import-logs-genai/auto-import-arch.png)\\n_Automatic Import powered by the Elastic Search AI Platform_\\n\\nWithin minutes, a validated custom integration is created that accurately maps raw data into ECS and custom fields, populates contextual information (such as `related.*` fields), and categorizes events.\\n\\nAutomatic Import currently supports Anthropic models via [Elastic’s connector for Amazon Bedrock](https://www.elastic.co/guide/en/kibana/8.15/bedrock-action-type.html), and additional LLMs will be introduced soon. It supports JSON and NDJSON-based log formats currently.\\n\\n\\n### Automatic Import workflow\\n\\nSREs are constantly having to manage new tools and components that developers add into applications. Neo4j, is a database that doesn’t have an integration in Elastic. The following steps walk you through how to create an integration for Neo4j with automatic import:\\n\\n1. Start by navigating to `Integrations` -> `Create new integration`.\\n\\n![Create new integration](/assets/images/elastic-automatic-import-logs-genai/auto-import-new-int.png)\\n\\n2. Provide a name and description for the new data source.\\n\\n![Set up integration](/assets/images/elastic-automatic-import-logs-genai/auto-import-neo4j-setup.png)\\n\\n3. Next, fill in other details and provide some sample data, anonymized as you see fit.\\n\\n![Set up pipeline](/assets/images/elastic-automatic-import-logs-genai/auto-import-pipline.png)\\n\\n4. Click “Analyze logs” to submit integration details, sample logs, and expert-written instructions from Elastic to the specified LLM, which builds the integration package using generative AI. Automatic Import then fine-tunes the integration in an automated feedback loop until it is validated to meet Elastic requirements.\\n\\n![Analyze sample logs](/assets/images/elastic-automatic-import-logs-genai/auto-import-analysis.png)\\n\\n5. Review what automatic Import presents as recommended mappings to ECS fields and custom fields. You can easily adjust these settings if necessary.\\n\\n![Review Analysis](/assets/images/elastic-automatic-import-logs-genai/auto-import-finished.png)\\n\\n6. After finalizing the integration, add it to Elastic Agent or view it in Kibana. It is now available alongside your other integrations and follows the same workflows as prebuilt integrations. \\n\\n![Creation complete](/assets/images/elastic-automatic-import-logs-genai/auto-import-success.png)\\n\\n7. Upon deployment, you can begin analyzing newly ingested data immediately. Start by looking at the new Logs Explorer in Elastic Observability\\n\\n![Look at logs](/assets/images/elastic-automatic-import-logs-genai/auto-import-explorer.png)\\n\\n## Accelerate log-analytics with automatic import \\n\\nAutomatic Import lowers the time required to build and test custom data integrations from days to minutes, accelerating the switch to [AI-driven log analytics](https://www.elastic.co/observability/aiops). Elastic Observability pairs the unique power of Automatic Import with Elastic’s deep library of prebuilt data integrations, enabling wider visibility and fast data onboarding, along with AI-based features, such as the Elastic AI Assistant to accelerate RCA and reduce operational overhead.\\n\\nInterested in our [Express Migration](https://www.elastic.co/splunk-replacement) program to level up to Elastic? [Contact Elastic](https://www.elastic.co/splunk-interest?elektra=organic\\\\&storm=CLP\\\\&rogue=splunkobs-gic) to learn more.\xa0\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\xa0\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n\\n","code":"var Component=(()=>{var m=Object.create;var o=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var f=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),y=(i,e)=>{for(var a in e)o(i,a,{get:e[a],enumerable:!0})},s=(i,e,a,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of g(e))!w.call(i,n)&&n!==a&&o(i,n,{get:()=>e[n],enumerable:!(r=p(e,n))||r.enumerable});return i};var b=(i,e,a)=>(a=i!=null?m(u(i)):{},s(e||!i||!i.__esModule?o(a,\\"default\\",{value:i,enumerable:!0}):a,i)),v=i=>s(o({},\\"__esModule\\",{value:!0}),i);var c=f((x,l)=>{l.exports=_jsx_runtime});var E={};y(E,{default:()=>h,frontmatter:()=>A});var t=b(c()),A={title:\\"Accelerate log analytics in Elastic Observability with Automatic Import powered by Search AI\\",slug:\\"elastic-automatic-import-logs-genai\\",date:\\"2024-09-04\\",description:\\"Migrate your logs to AI-driven log analytics in record time by automating custom data integrations\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"elastic-auto-importv2.jpg\\",tags:[{slug:\\"log-analytics\\"}]};function d(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Elastic is accelerating the adoption of \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/aiops\\",rel:\\"nofollow\\",children:\\"AI-driven log analytics\\"}),\\" by automating the ingestion of custom logs, which is increasingly important as the deployment of GenAI-based applications grows. These custom data sources must be ingested, parsed, and indexed effortlessly, enabling broader visibility and more straightforward root cause analysis (RCA) without requiring effort from Site Reliability Engineers (SREs). Achieving visibility across an enterprise IT environment is inherently challenging for SREs due to constant growth and change, such as new applications, added systems, and infrastructure migrations to the cloud. Until now, the onboarding of custom data has been costly and complex for SREs. With automatic import, SREs can concentrate on deploying, optimizing, and improving applications.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Automatic Import uses generative AI to automate the development of custom data integrations, reducing the time required from several days to less than 10 minutes and significantly lowering the learning curve for onboarding data. Powered by the\\\\xA0 \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/platform\\",rel:\\"nofollow\\",children:\\"Elastic Search AI Platform\\"}),\\", it provides model-agnostic access to leverage large language models (LLMs) and grounds answers in proprietary data through \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/search-labs/blog/retrieval-augmented-generation-rag\\",rel:\\"nofollow\\",children:\\"retrieval augmented generation (RAG)\\"}),\\". This capability is further enhanced by Elastic\'s expertise in enabling observability teams to utilize any type of data and the flexibility of its \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/generative-ai/search-ai-lake\\",rel:\\"nofollow\\",children:\\"Search AI Lake\\"}),\\". Arriving at a crucial time when organizations face an explosion of applications and telemetry data, such as logs, Automatic Import streamlines the initial stages of data migration by simplifying data collection and normalization. It also addresses the challenges of building custom connectors, which can otherwise delay deployments, issue analysis, and impact customer experiences.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-automatic-import-logs-genai/auto-import-new-int.png\\",alt:\\"Create new integration\\",width:\\"720\\",height:\\"580\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"enhancing-ai-powered-observability-with-automatic-import\\",children:\\"Enhancing AI Powered Observability with Automatic Import\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Automatic Import\\"}),\\" builds on Elastic Observability\\\\u2019s AI-driven log analytics innovations\\\\u2014such as\\\\xA0 \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-getting-started.html\\",rel:\\"nofollow\\",children:\\"anomaly detection\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/xpack-ml-aiops.html\\",rel:\\"nofollow\\",children:\\"log rate and pattern analysis\\"}),\\", and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/introducing-elastic-ai-assistant\\",rel:\\"nofollow\\",children:\\"Elastic AI Assistant\\"}),\\", and further automates and simplifies SRE\\\\u2019s workflows. Automatic Import applies generative AI to automate the creation of custom data integrations, allowing SREs to focus on logs and other telemetry data. While Elastic provides over \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations\\",rel:\\"nofollow\\",children:\\"400+ prebuilt data integrations\\"}),\\", automatic import allows SREs to extend integrations to fit their workflows and expand visibility into production environments.\\\\xA0\\\\xA0\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In conjunction with automatic import, Elastic is introducing \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ai-log-analytics-express-migration\\",rel:\\"nofollow\\",children:\\"Elastic Express Migration\\"}),\\", a commercial incentive program designed to overcome migration inertia from existing deployments and contracts, providing a faster adoption path for new customers.\\\\xA0\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Automatic Import leverages \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-faq\\",rel:\\"nofollow\\",children:\\"Elastic Common Schema (ECS)\\"}),\\" with public LLMs to process and analyze data in ECS format which is also part of OpenTelemetry. Once the data is in, SRE\\\\u2019s can leverage Elastic\\\\u2019s RAG-based AI Assistant to solve root cause analysis (RCA) challenges in dynamic, complex environments.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"configuring-and-using-automatic-import\\",children:\\"Configuring and using Automatic Import\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Automatic Import is available to everyone with an Enterprise license. Here is how it works:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"The user configures connectivity to an LLM and uploads sample data\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Automatic Import then extrapolates what to expect from the data source. These log samples are paired with LLM prompts that have been honed by Elastic engineers to reliably produce conformant Elasticsearch ingest pipelines.\\\\xA0\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Automatic Import then iteratively builds, tests, and tweaks a custom ingest pipeline until it meets Elastic integration requirements.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-automatic-import-logs-genai/auto-import-arch.png\\",alt:\\"Create new integration Architecture\\",width:\\"1599\\",height:\\"661\\"}),`\\n`,(0,t.jsx)(e.em,{children:\\"Automatic Import powered by the Elastic Search AI Platform\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Within minutes, a validated custom integration is created that accurately maps raw data into ECS and custom fields, populates contextual information (such as \\",(0,t.jsx)(e.code,{children:\\"related.*\\"}),\\" fields), and categorizes events.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Automatic Import currently supports Anthropic models via \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/8.15/bedrock-action-type.html\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s connector for Amazon Bedrock\\"}),\\", and additional LLMs will be introduced soon. It supports JSON and NDJSON-based log formats currently.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"automatic-import-workflow\\",children:\\"Automatic Import workflow\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"SREs are constantly having to manage new tools and components that developers add into applications. Neo4j, is a database that doesn\\\\u2019t have an integration in Elastic. The following steps walk you through how to create an integration for Neo4j with automatic import:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Start by navigating to \\",(0,t.jsx)(e.code,{children:\\"Integrations\\"}),\\" -> \\",(0,t.jsx)(e.code,{children:\\"Create new integration\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-automatic-import-logs-genai/auto-import-new-int.png\\",alt:\\"Create new integration\\",width:\\"720\\",height:\\"580\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Provide a name and description for the new data source.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-automatic-import-logs-genai/auto-import-neo4j-setup.png\\",alt:\\"Set up integration\\",width:\\"775\\",height:\\"788\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Next, fill in other details and provide some sample data, anonymized as you see fit.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-automatic-import-logs-genai/auto-import-pipline.png\\",alt:\\"Set up pipeline\\",width:\\"752\\",height:\\"946\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Click \\\\u201CAnalyze logs\\\\u201D to submit integration details, sample logs, and expert-written instructions from Elastic to the specified LLM, which builds the integration package using generative AI. Automatic Import then fine-tunes the integration in an automated feedback loop until it is validated to meet Elastic requirements.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-automatic-import-logs-genai/auto-import-analysis.png\\",alt:\\"Analyze sample logs\\",width:\\"769\\",height:\\"617\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Review what automatic Import presents as recommended mappings to ECS fields and custom fields. You can easily adjust these settings if necessary.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-automatic-import-logs-genai/auto-import-finished.png\\",alt:\\"Review Analysis\\",width:\\"778\\",height:\\"792\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"6\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"After finalizing the integration, add it to Elastic Agent or view it in Kibana. It is now available alongside your other integrations and follows the same workflows as prebuilt integrations.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-automatic-import-logs-genai/auto-import-success.png\\",alt:\\"Creation complete\\",width:\\"783\\",height:\\"693\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"7\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Upon deployment, you can begin analyzing newly ingested data immediately. Start by looking at the new Logs Explorer in Elastic Observability\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-automatic-import-logs-genai/auto-import-explorer.png\\",alt:\\"Look at logs\\",width:\\"1674\\",height:\\"633\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"accelerate-log-analytics-with-automatic-import\\",children:\\"Accelerate log-analytics with automatic import\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Automatic Import lowers the time required to build and test custom data integrations from days to minutes, accelerating the switch to \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/aiops\\",rel:\\"nofollow\\",children:\\"AI-driven log analytics\\"}),\\". Elastic Observability pairs the unique power of Automatic Import with Elastic\\\\u2019s deep library of prebuilt data integrations, enabling wider visibility and fast data onboarding, along with AI-based features, such as the Elastic AI Assistant to accelerate RCA and reduce operational overhead.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Interested in our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/splunk-replacement\\",rel:\\"nofollow\\",children:\\"Express Migration\\"}),\\" program to level up to Elastic? \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/splunk-interest?elektra=organic&storm=CLP&rogue=splunkobs-gic\\",rel:\\"nofollow\\",children:\\"Contact Elastic\\"}),\\" to learn more.\\\\xA0\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"}),\\"\\\\xA0\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function h(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(d,{...i})}):d(i)}return v(E);})();\\n;return Component;"},"_id":"articles/elastic-automatic-import-logs-genai.mdx","_raw":{"sourceFilePath":"articles/elastic-automatic-import-logs-genai.mdx","sourceFileName":"elastic-automatic-import-logs-genai.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-automatic-import-logs-genai"},"type":"Article","imageUrl":"/assets/images/elastic-automatic-import-logs-genai/elastic-auto-importv2.jpg","readingTime":"6 min read","url":"/elastic-automatic-import-logs-genai","headings":[{"level":2,"title":"Enhancing AI Powered Observability with Automatic Import","href":"#enhancing-ai-powered-observability-with-automatic-import"},{"level":2,"title":"Configuring and using Automatic Import","href":"#configuring-and-using-automatic-import"},{"level":3,"title":"Automatic Import workflow","href":"#automatic-import-workflow"},{"level":2,"title":"Accelerate log-analytics with automatic import ","href":"#accelerate-log-analytics-with-automatic-import-"}]},{"title":"Introducing Elastic Distribution of OpenTelemetry Collector","slug":"elastic-distribution-opentelemetry-collector","date":"2024-08-09","description":"We are thrilled to announce the technical preview of the Elastic Distribution of OpenTelemetry Collector. This new offering underscores Elastic dedication to this important framework and highlights our ongoing contributions to make OpenTelemetry the best vendor agnostic data collection framework.","image":"otel-collector-announcement.jpeg","author":[{"slug":"alexander-wert","type":"Author","_raw":{}},{"slug":"miguel-luna","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"log analytics","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nOpenTelemetry is an open-source framework that ensures vendor-agnostic data collection, providing a standardized approach for the collection, processing, and ingestion of observability data. Elastic is fully committed to this principle, aiming to make observability truly vendor-agnostic and eliminating the need for users to reinstrument their observability when switching platforms.\\n\\nOver the past year, Elastic has made several notable contributions to the OpenTelemetry ecosystem. We [donated our Elastic Common Schema (ECS)](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/) to OpenTelemetry, successfully [integrated the eBPF-based profiling agent](https://opentelemetry.io/blog/2024/elastic-contributes-continuous-profiling-agent/), and have consistently been one of the top contributing companies across the OpenTelemetry project. Additionally, Elastic has significantly improved upstream logging capabilities within OpenTelemetry with enhancements to key areas such as [container logging](https://opentelemetry.io/blog/2024/otel-collector-container-log-parser/), further enhancing the framework’s robustness.\xa0\\n\\nThese efforts demonstrate our strategic focus on enhancing and expanding the capabilities of OpenTelemetry for the broader observability community and reinforce the vendor-agnostic benefits of using OpenTelemetry.\\n\\nToday, we are thrilled to announce the technical preview of the Elastic Distribution of OpenTelemetry Collector. This new offering underscores Elastic’s dedication to this important framework and highlights our ongoing contributions to make OpenTelemetry the best vendor agnostic data collection framework.\\n\\n## Elastic Agent as an OpenTelemetry Collector\\n\\nTechnically, the Elastic Distribution of OpenTelemetry Collector represents an evolution of the Elastic Agent. In its latest version, the Elastic Agent can operate in an OpenTelemetry mode. This mode invokes a module within the Elastic Agent which is essentially a distribution of the OpenTelemetry collector. It is crafted using a selection of upstream components from the contrib distribution.\\n\\nThe Elastic OpenTelemetry Collector also includes configuration for this set of [upstream OpenTelemetry Collector components](https://github.com/elastic/elastic-agent/tree/main/internal/pkg/otel#components), providing out-of-the-box functionality with Elastic Observability. This integration allows users to seamlessly utilize Elastic’s advanced observability features with minimal setup.\\n\\nThe technical preview version of the Elastic OpenTelemetry Collector has been tailored with out-of-the-box configurations for the below use cases, we will keep working to add more as we progress: :\\n\\n- **_Collect and ship logs_**: Use the Elastic OpenTelemetry Collector to gather log data from various sources and ship it directly to Elastic where it can be analyzed in Kibana Discover, and Elastic Observability’s Explorer (also in Tech Preview in 8.15).\\n\\n- **_Assess host health_**: Leverage the OpenTelemetry host metrics and Kubernetes receivers to monitor to evaluate the performance of hosts and pods. This data can then be visualized and analyzed in Elastic’s Infrastructure Observability UIs, providing deep insights into host performance and health. Details of how this is configured in the OTel collector is outlined in this [blog](https://www.elastic.co/observability-labs/blog/infrastructure-monitoring-with-opentelemetry-in-elastic-observability).\\n\\n- **Kubernetes container logs**: Additionally, users of the Elastic OpenTelemetry Collector benefit from out-of-the-box Kubernetes container and application logs enriched with Kubernetes metadata by leveraging the powerful [container log parser](https://opentelemetry.io/blog/2024/otel-collector-container-log-parser/) Elastic recently contributed to OTel. This OpenTelemetry-based enrichment enhances the context and value of the collected logs, providing deeper insights and more effective troubleshooting capabilities.\\n\\nWhile the Elastic OpenTelemetry Collector comes pre-built and preconfigured for the sake of easier onboarding and getting started experience, Elastic is committed to the vision of vendor-neutral collection of data. Thus, we strive to contribute any Elastic specific features back to the upstream OpenTelemetry components, to advance and help grow the OpenTelemetry landscape and capabilities.\\n\\nStay tuned for upcoming announcements sharing our plans to combine the best of Elastic Agent and OpenTelemetry Collector.\\n\\n## Get started the Elastic Distribution of OpenTelemetry Collector\\n\\nTo get started with a guided onboarding flow for the Elastic Distribution of the OpenTelemetry Collector for Kubernetes, Linux, and Mac environments, visit the[ guided onboarding documentation](https://github.com/elastic/opentelemetry/blob/main/docs/guided-onboarding.md).\\n\\nFor more advanced manual configuration, follow the[ manual configuration instructions](https://github.com/elastic/opentelemetry/blob/main/docs/manual-configuration.md).\\n\\nOnce the Elastic Distribution of the OpenTelemetry Collector is set up and running, you’ll be able to analyze your systems within various features of the Elastic Observability solution.\\n\\nAnalyze the performance and health of your infrastructure, through corresponding metrics and logs collected through OpenTelemetry Collector receivers, such as the host metrics receiver and different Kubernetes receivers.\\n\\n![OTel Monitoring Hosts](/assets/images/elastic-distribution-opentelemetry-collector/hosts.png)\\n\\n\\n![OTel Logs](/assets/images/elastic-distribution-opentelemetry-collector/otel-daemonset-green-logs.png)\\n\\n\\nWith Elastic OpenTelemetry Collector, container and application logs are enriched with Kubernetes metadata out-of-the-box making filtering, grouping and logs analysis easier and more efficient.\\n\\n\\n![OTel Discover](/assets/images/elastic-distribution-opentelemetry-collector/explorer.png)\\n\\nThe Elastic Distribution of the OpenTelemetry Collector allows for tracing just like any other collector distribution made of upstream components. Explore and analyze the performance and runtime behavior of your applications and services through RED metric, service maps and distributed traces collected from OpenTelemetry SDKs.\\n\\n\\n![OTel APM](/assets/images/elastic-distribution-opentelemetry-collector/apm.png)\\n\\n\\nThe above capabilities and features packed with the Elastic OpenTelemetry Collector can be achieved in a similar way with a custom build of the upstream OpenTelemetry Collector packing the right set of upstream components. To do just that follow our [guidance here](https://github.com/elastic/opentelemetry/blob/main/docs/configure-upstream-collector.md).\\n\\n\\n## Outlook\\n\\nThe launch of the technical preview of the Elastic Distribution of OpenTelemetry Collector is another step on Elastic’s journey towards OpenTelemetry based observability. On that journey we are committed to a vendor-agnostic approach to data collection and therefore prioritize upstream contribution to OpenTelemetry over Elastic-specific data collection features.\xa0\\n\\nStay tuned to see more of Elastic’s contributions to OpenTelemetry and observe Elastic’s journey towards fully OpenTelemetry-based observability.\xa0\\n\\nAdditional resources for OpenTelemetry with Elastic:\\n\\n- Elastic Distributions recently introduced:\\n\\n - [Elastic Distribution of OpenTelemetry\'s Java SDK](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent).\\n\\n - [Elastic Distribution of OpenTelemetry\'s Python SDK](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python)\\n\\n - [Elastic Distribution of OpenTelemetry\'s NodeJS SDK](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js)\\n\\n - [Elastic Distribution of OpenTelemetry\'s .NET SDK](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications)\\n\\n - [Elastic Distribution of OpenTelemetry of iOS and Android](https://www.elastic.co/observability-labs/blog/apm-ios-android-native-apps)\\n\\n\\n- Other Elastic OpenTelemetry resources:\\n\\n - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n\\n - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n\\n - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n\\n- Instrumentation resources:\\n\\n - Python:\xa0[Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry),\xa0[Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n\\n - Java:\xa0[Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry),\xa0[Manual instrumentation\xa0](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n\\n - Node.js:\xa0[Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry),\xa0[Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n\\n - .NET:\xa0[Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry),\xa0[Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n\\n","code":"var Component=(()=>{var p=Object.create;var l=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var b=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),y=(n,e)=>{for(var i in e)l(n,i,{get:e[i],enumerable:!0})},a=(n,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of u(e))!f.call(n,o)&&o!==i&&l(n,o,{get:()=>e[o],enumerable:!(r=m(e,o))||r.enumerable});return n};var w=(n,e,i)=>(i=n!=null?p(g(n)):{},a(e||!n||!n.__esModule?l(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>a(l({},\\"__esModule\\",{value:!0}),n);var c=b((C,s)=>{s.exports=_jsx_runtime});var O={};y(O,{default:()=>d,frontmatter:()=>T});var t=w(c()),T={title:\\"Introducing Elastic Distribution of OpenTelemetry Collector\\",slug:\\"elastic-distribution-opentelemetry-collector\\",date:\\"2024-08-09\\",description:\\"We are thrilled to announce the technical preview of the Elastic Distribution of OpenTelemetry Collector. This new offering underscores Elastic dedication to this important framework and highlights our ongoing contributions to make OpenTelemetry the best vendor agnostic data collection framework.\\",author:[{slug:\\"alexander-wert\\"},{slug:\\"miguel-luna\\"}],image:\\"otel-collector-announcement.jpeg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"log analytics\\"},{slug:\\"instrumentation\\"}]};function h(n){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"OpenTelemetry is an open-source framework that ensures vendor-agnostic data collection, providing a standardized approach for the collection, processing, and ingestion of observability data. Elastic is fully committed to this principle, aiming to make observability truly vendor-agnostic and eliminating the need for users to reinstrument their observability when switching platforms.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Over the past year, Elastic has made several notable contributions to the OpenTelemetry ecosystem. We \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"donated our Elastic Common Schema (ECS)\\"}),\\" to OpenTelemetry, successfully \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2024/elastic-contributes-continuous-profiling-agent/\\",rel:\\"nofollow\\",children:\\"integrated the eBPF-based profiling agent\\"}),\\", and have consistently been one of the top contributing companies across the OpenTelemetry project. Additionally, Elastic has significantly improved upstream logging capabilities within OpenTelemetry with enhancements to key areas such as \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2024/otel-collector-container-log-parser/\\",rel:\\"nofollow\\",children:\\"container logging\\"}),\\", further enhancing the framework\\\\u2019s robustness.\\\\xA0\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"These efforts demonstrate our strategic focus on enhancing and expanding the capabilities of OpenTelemetry for the broader observability community and reinforce the vendor-agnostic benefits of using OpenTelemetry.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Today, we are thrilled to announce the technical preview of the Elastic Distribution of OpenTelemetry Collector. This new offering underscores Elastic\\\\u2019s dedication to this important framework and highlights our ongoing contributions to make OpenTelemetry the best vendor agnostic data collection framework.\\"}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"elastic-agent-as-an-opentelemetry-collector\\",children:[\\"Elastic Agent as an OpenTelemetry Collector\\",(0,t.jsx)(\\"a\\",{id:\\"elastic-agent-as-an-opentelemetry-collector\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Technically, the Elastic Distribution of OpenTelemetry Collector represents an evolution of the Elastic Agent. In its latest version, the Elastic Agent can operate in an OpenTelemetry mode. This mode invokes a module within the Elastic Agent which is essentially a distribution of the OpenTelemetry collector. It is crafted using a selection of upstream components from the contrib distribution.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The Elastic OpenTelemetry Collector also includes configuration for this set of \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-agent/tree/main/internal/pkg/otel#components\\",rel:\\"nofollow\\",children:\\"upstream OpenTelemetry Collector components\\"}),\\", providing out-of-the-box functionality with Elastic Observability. This integration allows users to seamlessly utilize Elastic\\\\u2019s advanced observability features with minimal setup.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The technical preview version of the Elastic OpenTelemetry Collector has been tailored with out-of-the-box configurations for the below use cases, we will keep working to add more as we progress: :\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Collect and ship logs\\"})}),\\": Use the Elastic OpenTelemetry Collector to gather log data from various sources and ship it directly to Elastic where it can be analyzed in Kibana Discover, and Elastic Observability\\\\u2019s Explorer (also in Tech Preview in 8.15).\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Assess host health\\"})}),\\": Leverage the OpenTelemetry host metrics and Kubernetes receivers to monitor to evaluate the performance of hosts and pods. This data can then be visualized and analyzed in Elastic\\\\u2019s Infrastructure Observability UIs, providing deep insights into host performance and health. Details of how this is configured in the OTel collector is outlined in this \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/infrastructure-monitoring-with-opentelemetry-in-elastic-observability\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Kubernetes container logs\\"}),\\": Additionally, users of the Elastic OpenTelemetry Collector benefit from out-of-the-box Kubernetes container and application logs enriched with Kubernetes metadata by leveraging the powerful \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2024/otel-collector-container-log-parser/\\",rel:\\"nofollow\\",children:\\"container log parser\\"}),\\" Elastic recently contributed to OTel. This OpenTelemetry-based enrichment enhances the context and value of the collected logs, providing deeper insights and more effective troubleshooting capabilities.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"While the Elastic OpenTelemetry Collector comes pre-built and preconfigured for the sake of easier onboarding and getting started experience, Elastic is committed to the vision of vendor-neutral collection of data. Thus, we strive to contribute any Elastic specific features back to the upstream OpenTelemetry components, to advance and help grow the OpenTelemetry landscape and capabilities.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Stay tuned for upcoming announcements sharing our plans to combine the best of Elastic Agent and OpenTelemetry Collector.\\"}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"get-started-the-elastic-distribution-of-opentelemetry-collector\\",children:[\\"Get started the Elastic Distribution of OpenTelemetry Collector\\",(0,t.jsx)(\\"a\\",{id:\\"get-started-the-elastic-distribution-for-opentelemetry-collector\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To get started with a guided onboarding flow for the Elastic Distribution of the OpenTelemetry Collector for Kubernetes, Linux, and Mac environments, visit the\\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry/blob/main/docs/guided-onboarding.md\\",rel:\\"nofollow\\",children:\\" guided onboarding documentation\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For more advanced manual configuration, follow the\\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry/blob/main/docs/manual-configuration.md\\",rel:\\"nofollow\\",children:\\" manual configuration instructions\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once the Elastic Distribution of the OpenTelemetry Collector is set up and running, you\\\\u2019ll be able to analyze your systems within various features of the Elastic Observability solution.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Analyze the performance and health of your infrastructure, through corresponding metrics and logs collected through OpenTelemetry Collector receivers, such as the host metrics receiver and different Kubernetes receivers.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distribution-opentelemetry-collector/hosts.png\\",alt:\\"OTel Monitoring Hosts\\",width:\\"1600\\",height:\\"844\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distribution-opentelemetry-collector/otel-daemonset-green-logs.png\\",alt:\\"OTel Logs\\",width:\\"512\\",height:\\"363\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With Elastic OpenTelemetry Collector, container and application logs are enriched with Kubernetes metadata out-of-the-box making filtering, grouping and logs analysis easier and more efficient.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distribution-opentelemetry-collector/explorer.png\\",alt:\\"OTel Discover\\",width:\\"1600\\",height:\\"900\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Elastic Distribution of the OpenTelemetry Collector allows for tracing just like any other collector distribution made of upstream components. Explore and analyze the performance and runtime behavior of your applications and services through RED metric, service maps and distributed traces collected from OpenTelemetry SDKs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distribution-opentelemetry-collector/apm.png\\",alt:\\"OTel APM\\",width:\\"1600\\",height:\\"900\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The above capabilities and features packed with the Elastic OpenTelemetry Collector can be achieved in a similar way with a custom build of the upstream OpenTelemetry Collector packing the right set of upstream components. To do just that follow our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry/blob/main/docs/configure-upstream-collector.md\\",rel:\\"nofollow\\",children:\\"guidance here\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"outlook\\",children:[\\"Outlook\\",(0,t.jsx)(\\"a\\",{id:\\"outlook\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The launch of the technical preview of the Elastic Distribution of OpenTelemetry Collector is another step on Elastic\\\\u2019s journey towards OpenTelemetry based observability. On that journey we are committed to a vendor-agnostic approach to data collection and therefore prioritize upstream contribution to OpenTelemetry over Elastic-specific data collection features.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Stay tuned to see more of Elastic\\\\u2019s contributions to OpenTelemetry and observe Elastic\\\\u2019s journey towards fully OpenTelemetry-based observability.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Additional resources for OpenTelemetry with Elastic:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic Distributions recently introduced:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry\'s Java SDK\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry\'s Python SDK\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry\'s NodeJS SDK\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry\'s .NET SDK\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/apm-ios-android-native-apps\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry of iOS and Android\\"})}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Other Elastic OpenTelemetry resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Instrumentation resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Python:\\\\xA0\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\",\\\\xA0\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Java:\\\\xA0\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\",\\\\xA0\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\\\xA0\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Node.js:\\\\xA0\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\",\\\\xA0\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\".NET:\\\\xA0\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\",\\\\xA0\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(O);})();\\n;return Component;"},"_id":"articles/elastic-distribution-opentelemetry-collector.mdx","_raw":{"sourceFilePath":"articles/elastic-distribution-opentelemetry-collector.mdx","sourceFileName":"elastic-distribution-opentelemetry-collector.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-distribution-opentelemetry-collector"},"type":"Article","imageUrl":"/assets/images/elastic-distribution-opentelemetry-collector/otel-collector-announcement.jpeg","readingTime":"6 min read","url":"/elastic-distribution-opentelemetry-collector","headings":[{"level":2,"title":"Elastic Agent as an OpenTelemetry Collector","href":"#elastic-agent-as-an-opentelemetry-collectora-idelastic-agent-as-an-opentelemetry-collectora"},{"level":2,"title":"Get started the Elastic Distribution of OpenTelemetry Collector","href":"#get-started-the-elastic-distribution-of-opentelemetry-collectora-idget-started-the-elastic-distribution-for-opentelemetry-collectora"},{"level":2,"title":"Outlook","href":"#outlooka-idoutlooka"}]},{"title":"Announcing GA of Elastic distribution of the OpenTelemetry Java Agent","slug":"elastic-distribution-opentelemetry-java-agent","date":"2024-09-12","description":"Elastic announces general availability of the Elastic distribution of the OpenTelemetry (OTel) Java Agent, a fully OTel-compatible agent with a rich set of useful additional features.","image":"observability-launch-series-3-java-auto.jpg","author":[{"slug":"alexander-wert","type":"Author","_raw":{}},{"slug":"jack-shirazi","type":"Author","_raw":{}},{"slug":"jonas-kunz","type":"Author","_raw":{}},{"slug":"sylvain-juge","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"java","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs Elastic continues its commitment to OpenTelemetry (OTel), we are excited to announce general availability of the [Elastic Distribution of OpenTelemetry Java (EDOT Java)](https://github.com/elastic/elastic-otel-java). EDOT Java is a fully compatible drop-in replacement for the OTel Java agent that comes with a set of built-in, useful extensions for powerful additional features and improved usability with Elastic Observability. Use EDOT Java to start the OpenTelemetry SDK with your Java application, and automatically capture tracing data, performance metrics, and logs. Traces, metrics, and logs can be sent to any OpenTelemetry Protocol (OTLP) collector you choose.\\n\\nWith EDOT Java you have access to all the features of the OpenTelemetry Java agent plus:\\n\\n* Access to SDK improvements and bug fixes contributed by the Elastic team before the changes are available upstream in OpenTelemetry repositories.\\n* Access to optional features that can enhance OpenTelemetry data that is being sent to Elastic (for example, inferred spans and span stacktrace).\\n\\nIn this blog post, we will explore the rationale behind our unique distribution, detailing the powerful additional features it brings to the table. We will provide an overview of how these enhancements can be utilized with our distribution, the standard OTel SDK, or the vanilla OTel Java agent. Stay tuned as we conclude with a look ahead at our future plans and what you can expect from Elastic contributions to OTel Java moving forward.\\n\\n## Elastic Distribution of OpenTelemetry Java (EDOT Java)\\n\\nUntil now, Elastic users looking to monitor their Java services through automatic instrumentation had two options: the proprietary Elastic APM Java agent or the vanilla OTel Java agent. While both agents offer robust capabilities and have reached a high level of maturity, each has its distinct advantages and limitations. The OTel Java agent provides extensive instrumentation across a broad spectrum of frameworks and libraries, is highly extensible, and natively emits OTel data. Conversely, the Elastic APM Java agent includes several powerful features absent in the OTel Java agent.\\n\\nElastic’s distribution of the OTel Java agent aims to bring together the best aspects of the proprietary Elastic Java agent and the OpenTelemetry Java agent. This distribution enhances the vanilla OTel Java agent with a set of additional features realized through extensions, while still being a fully compatible drop-in replacement.\\n\\n![Elastic distribution of the OpenTelemetry Java agent](/assets/images/elastic-distribution-opentelemetry-java-agent/1.png)\\n\\nElastic’s commitment to OpenTelemetry not only focuses on standardizing data collection around OTel but also includes improving OTel components and integrating Elastic\'s data collection features into OTel. In this vein, our ultimate goal is to contribute as many features from Elastic’s distribution back to the upstream OTel Java agent; our distribution is designed in such a way that the additional features, realized as extensions, work directly with the OTel SDK. This means they can be used independent of Elastic’s distro — either with the Otel Java SDK or with the vanilla OTel Java agent. We’ll discuss these usage patterns further in the sections below.\\n\\n## Features included\\n\\nThe Elastic distribution of the OpenTelemetry Java agent includes a suite of extensions that deliver the features outlined below.\\n\\n### Inferred spans\\n\\nIn a [recent blog post](/blog/tracing-data-inferred-spans-opentelemetry), we introduced inferred spans, a powerful feature designed to enhance distributed traces with additional profiling-based spans.\\n\\n![Inferred spans](/assets/images/elastic-distribution-opentelemetry-java-agent/2.png)\\n\\nInferred spans (blue spans labeled “internal” in the above image) offer valuable insights into sources of latency within the code that might remain uncaptured by purely instrumentation-based traces. In other words, they fill in the gaps between instrumentation-based traces. The Elastic distribution of the OTel Java agent includes the inferred spans feature. It can be enabled by setting the following environment variable.\\n\\n```bash\\nELASTIC_OTEL_INFERRED_SPANS_ENABLED=true\\n```\\n\\n### Correlation with profiling\\n\\nWith [OpenTelemetry embracing profiling](https://opentelemetry.io/blog/2024/profiling/) and [Elastic\'s proposal to donate its eBPF-based, continuous profiling agent](https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry), a new frontier opens up in correlating distributed traces with continuous profiling data. This integration offers unprecedented code-level insights into latency issues and CO2 emission footprints, all within a clearly defined service, transaction, and trace context. To get started, follow [this guide](https://www.elastic.co/observability-labs/blog/universal-profiling-with-java-apm-services-traces) to setup universal profiling and the OpenTelemetry integration. In order to get more background information on the feature, check out [this blog article](https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation), where we explore how these technologies converge to enhance observability and environmental consciousness in software development.\\n\\n\\n![Correlation with profiling](/assets/images/elastic-distribution-opentelemetry-java-agent/3.png)\\n\\nUsers of Elastic Universal Profiling can already leverage the Elastic distribution of the OTel Java agent to access this powerful integration. With Elastic\'s proposed donation of the profiling agent, we anticipate that this capability will soon be available to all OTel users who employ the OTel Java agent in conjunction with the new OTel eBPF profiling.\\n\\n### Span stack traces\\n\\nIn many cases, spans within a distributed trace are relatively coarse-grained, particularly when features like inferred spans are not used. Understanding precisely where in the code path a span originates can be incredibly valuable. To address this need, the Elastic distribution of the OTel Java agent includes the span stack traces feature. This functionality provides crucial insights by collecting corresponding stack traces for spans that exceed a configurable minimum duration, pinpointing exactly where a span is initiated in the code.\\n\\n![Span stack traces](/assets/images/elastic-distribution-opentelemetry-java-agent/4.png)\\n\\nThis simple yet powerful feature significantly enhances problem troubleshooting, offering developers a clearer understanding of their application’s performance dynamics.\\n\\nIn the example above, it allows you to get the call stack of a gRPC call, which can help understanding which code paths triggered it.\\n\\n### Auto-detection of service and cloud resources\\n\\nIn today\'s expansive and diverse cloud environments, which often include multiple regions and cloud providers, having information on where your services are operating is incredibly valuable. Particularly in Java services, where the service name is frequently embedded within the deployment artifacts, the ability to automatically retrieve service and cloud resource information marks a substantial leap in usability.\\n\\n![Auto-detection of service and cloud resources](/assets/images/elastic-distribution-opentelemetry-java-agent/5.png)\\n\\nTo address this need, the Elastic distribution of the OTel Java agent includes built-in auto detectors for service and cloud resources, specifically for AWS and GCP, sourced from [the OpenTelemetry Java Contrib repository](https://github.com/open-telemetry/opentelemetry-java-contrib). This feature, which is on by default, enhances observability and streamlines the management of services across various cloud platforms, making it a key asset for any cloud-based deployment.\\n\\n## Ways to use the EDOT Java\\n\\nThe Elastic distribution of the OTel Java agent is designed to meet our users exactly where they are, accommodating a variety of needs and strategic approaches. Whether you\'re looking to fully integrate new observability features or simply enhance existing setups, the Elastic distribution offers multiple technical pathways to leverage its capabilities. This flexibility ensures that users can tailor the agent\'s implementation to align perfectly with their specific operational requirements and goals.\\n\\n### Using Elastic’s distribution directly\\n\\nThe most straightforward path to harnessing the capabilities described above is by adopting the Elastic distribution of the OTel Java agent as a drop-in replacement for the standard OTel Java agent. Structurally, the Elastic distro functions as a wrapper around the OTel Java agent, maintaining full compatibility with all upstream configuration options and incorporating all its features. Additionally, it includes the advanced features described above that significantly augment its functionality. Users of the Elastic distribution will also benefit from the comprehensive technical support provided by Elastic, which will commence once the agent achieves general availability. To get started, simply [download the agent Jar file](https://mvnrepository.com/artifact/co.elastic.otel/elastic-otel-javaagent) and attach it to your application:\\n\\n```bash\\njava -javaagent:/pathto/elastic-otel-javaagent.jar -jar myapp.jar\\n```\\n\\n### Using Elastic’s extensions with the vanilla OTel Java agent\\n\\nIf you prefer to continue using the vanilla OTel Java agent but wish to take advantage of the features described above, you have the flexibility to do so. We offer a separate agent extensions package specifically designed for this purpose. To integrate these enhancements, simply [download and place the extensions jar file](https://mvnrepository.com/artifact/co.elastic.otel/elastic-otel-agentextension) into a designated directory and configure the OTel Java agent extensions directory:\\n\\n```bash\\nOTEL_JAVAAGENT_EXTENSIONS=/pathto/elastic-otel-agentextension.jar\\njava -javaagent:/pathto/otel-javaagent.jar -jar myapp.jar\\n```\\n\\n### Using Elastic’s extensions manually with the OTel Java SDK\\n\\nIf you build your instrumentations directly into your applications using the OTel API and rely on the OTel Java SDK instead of the automatic Java agent, you can still use the features we\'ve discussed. Each feature is designed as a standalone component that can be integrated with the OTel Java SDK framework. To implement these features, simply refer to the specific descriptions for each one to learn how to configure the OTel Java SDK accordingly:\\n\\n- [Setting up the inferred spans feature with the SDK](https://github.com/elastic/elastic-otel-java/tree/main/inferred-spans)\\n- [Setting up profiling correlation with the SDK](https://github.com/elastic/elastic-otel-java/tree/main/universal-profiling-integration)\\n- [Setting up the span stack traces feature with the SDK](https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/span-stacktrace)\\n- Setting up resource detectors with the SDK\\n - [Service resource detectors](https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/resource-providers)\\n - [AWS resource detector](https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/aws-resources)\\n - [GCP resource detector](https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/gcp-resources)\\n\\nThis approach ensures that you can tailor your observability tools to meet your specific needs without compromising on functionality.\\n\\n## Future plans and contributions\\n\\nWe are committed to OpenTelemetry, and our contributions to the OpenTelemetry Java project will continue without limit. Not only are we focused on general improvements within the OTel Java project, but we are also committed to ensuring that the features discussed in this blog post become official extensions to the OpenTelemetry Java SDK/Agent and are included in the OpenTelemetry Java Contrib repository. We have already contributed the [span stack trace feature](https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/span-stacktrace) and initiated the contribution of the inferred spans feature, and we are eagerly anticipating the opportunity to add the profiling correlation feature following the successful integration of Elastic’s profiling agent.\\n\\nMoreover, our efforts extend beyond the current enhancements; we are actively working to port more features from the Elastic APM Java agent to OpenTelemetry. A particularly ambitious yet thrilling endeavor is our project to enable dynamic configurability of the OpenTelemetry Java agent. This future enhancement will allow for the OpenTelemetry Agent Management Protocol (OpAMP) to be used to remotely and dynamically configure OTel Java agents, improving their adaptability and ease of use.\\n\\nWe encourage you to experience the new Elastic distribution of the OTel Java agent and share your feedback with us. Your insights are invaluable as we strive to enhance the capabilities and reach of OpenTelemetry, making it even more powerful and user-friendly.\\n\\nCheck out more information on Elastic Distributions of OpenTelemetry in [github](https://github.com/elastic/opentelemetry?tab=readme-ov-file) and our latest [EDOT Blog](https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry)\\n\\nElastic provides the following components of EDOT:\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) Collector](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector)\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) Java](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent).\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) Python](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python)\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) NodeJS](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js)\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) .NET](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications)\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT)\xa0 iOS and Android](https://www.elastic.co/observability-labs/blog/apm-ios-android-native-apps)\\n\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var u=Object.create;var o=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var b=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),y=(i,e)=>{for(var a in e)o(i,a,{get:e[a],enumerable:!0})},l=(i,e,a,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of f(e))!m.call(i,n)&&n!==a&&o(i,n,{get:()=>e[n],enumerable:!(r=p(e,n))||r.enumerable});return i};var v=(i,e,a)=>(a=i!=null?u(g(i)):{},l(e||!i||!i.__esModule?o(a,\\"default\\",{value:i,enumerable:!0}):a,i)),w=i=>l(o({},\\"__esModule\\",{value:!0}),i);var c=b((J,s)=>{s.exports=_jsx_runtime});var O={};y(O,{default:()=>d,frontmatter:()=>T});var t=v(c()),T={title:\\"Announcing GA of Elastic distribution of the OpenTelemetry Java Agent\\",slug:\\"elastic-distribution-opentelemetry-java-agent\\",date:\\"2024-09-12\\",description:\\"Elastic announces general availability of the Elastic distribution of the OpenTelemetry (OTel) Java Agent, a fully OTel-compatible agent with a rich set of useful additional features.\\",author:[{slug:\\"alexander-wert\\"},{slug:\\"jack-shirazi\\"},{slug:\\"jonas-kunz\\"},{slug:\\"sylvain-juge\\"}],image:\\"observability-launch-series-3-java-auto.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"java\\"},{slug:\\"instrumentation\\"}]};function h(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"As Elastic continues its commitment to OpenTelemetry (OTel), we are excited to announce general availability of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry Java (EDOT Java)\\"}),\\". EDOT Java is a fully compatible drop-in replacement for the OTel Java agent that comes with a set of built-in, useful extensions for powerful additional features and improved usability with Elastic Observability. Use EDOT Java to start the OpenTelemetry SDK with your Java application, and automatically capture tracing data, performance metrics, and logs. Traces, metrics, and logs can be sent to any OpenTelemetry Protocol (OTLP) collector you choose.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With EDOT Java you have access to all the features of the OpenTelemetry Java agent plus:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Access to SDK improvements and bug fixes contributed by the Elastic team before the changes are available upstream in OpenTelemetry repositories.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Access to optional features that can enhance OpenTelemetry data that is being sent to Elastic (for example, inferred spans and span stacktrace).\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog post, we will explore the rationale behind our unique distribution, detailing the powerful additional features it brings to the table. We will provide an overview of how these enhancements can be utilized with our distribution, the standard OTel SDK, or the vanilla OTel Java agent. Stay tuned as we conclude with a look ahead at our future plans and what you can expect from Elastic contributions to OTel Java moving forward.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"elastic-distribution-of-opentelemetry-java-edot-java\\",children:\\"Elastic Distribution of OpenTelemetry Java (EDOT Java)\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Until now, Elastic users looking to monitor their Java services through automatic instrumentation had two options: the proprietary Elastic APM Java agent or the vanilla OTel Java agent. While both agents offer robust capabilities and have reached a high level of maturity, each has its distinct advantages and limitations. The OTel Java agent provides extensive instrumentation across a broad spectrum of frameworks and libraries, is highly extensible, and natively emits OTel data. Conversely, the Elastic APM Java agent includes several powerful features absent in the OTel Java agent.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\\\\u2019s distribution of the OTel Java agent aims to bring together the best aspects of the proprietary Elastic Java agent and the OpenTelemetry Java agent. This distribution enhances the vanilla OTel Java agent with a set of additional features realized through extensions, while still being a fully compatible drop-in replacement.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distribution-opentelemetry-java-agent/1.png\\",alt:\\"Elastic distribution of the OpenTelemetry Java agent\\",width:\\"1999\\",height:\\"817\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\\\\u2019s commitment to OpenTelemetry not only focuses on standardizing data collection around OTel but also includes improving OTel components and integrating Elastic\'s data collection features into OTel. In this vein, our ultimate goal is to contribute as many features from Elastic\\\\u2019s distribution back to the upstream OTel Java agent; our distribution is designed in such a way that the additional features, realized as extensions, work directly with the OTel SDK. This means they can be used independent of Elastic\\\\u2019s distro \\\\u2014 either with the Otel Java SDK or with the vanilla OTel Java agent. We\\\\u2019ll discuss these usage patterns further in the sections below.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"features-included\\",children:\\"Features included\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Elastic distribution of the OpenTelemetry Java agent includes a suite of extensions that deliver the features outlined below.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"inferred-spans\\",children:\\"Inferred spans\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In a \\",(0,t.jsx)(e.a,{href:\\"/blog/tracing-data-inferred-spans-opentelemetry\\",children:\\"recent blog post\\"}),\\", we introduced inferred spans, a powerful feature designed to enhance distributed traces with additional profiling-based spans.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distribution-opentelemetry-java-agent/2.png\\",alt:\\"Inferred spans\\",width:\\"1621\\",height:\\"596\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Inferred spans (blue spans labeled \\\\u201Cinternal\\\\u201D in the above image) offer valuable insights into sources of latency within the code that might remain uncaptured by purely instrumentation-based traces. In other words, they fill in the gaps between instrumentation-based traces. The Elastic distribution of the OTel Java agent includes the inferred spans feature. It can be enabled by setting the following environment variable.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`ELASTIC_OTEL_INFERRED_SPANS_ENABLED=true\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"correlation-with-profiling\\",children:\\"Correlation with profiling\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2024/profiling/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry embracing profiling\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry\\",rel:\\"nofollow\\",children:\\"Elastic\'s proposal to donate its eBPF-based, continuous profiling agent\\"}),\\", a new frontier opens up in correlating distributed traces with continuous profiling data. This integration offers unprecedented code-level insights into latency issues and CO2 emission footprints, all within a clearly defined service, transaction, and trace context. To get started, follow \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/universal-profiling-with-java-apm-services-traces\\",rel:\\"nofollow\\",children:\\"this guide\\"}),\\" to setup universal profiling and the OpenTelemetry integration. In order to get more background information on the feature, check out \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation\\",rel:\\"nofollow\\",children:\\"this blog article\\"}),\\", where we explore how these technologies converge to enhance observability and environmental consciousness in software development.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distribution-opentelemetry-java-agent/3.png\\",alt:\\"Correlation with profiling\\",width:\\"1999\\",height:\\"1139\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Users of Elastic Universal Profiling can already leverage the Elastic distribution of the OTel Java agent to access this powerful integration. With Elastic\'s proposed donation of the profiling agent, we anticipate that this capability will soon be available to all OTel users who employ the OTel Java agent in conjunction with the new OTel eBPF profiling.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"span-stack-traces\\",children:\\"Span stack traces\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In many cases, spans within a distributed trace are relatively coarse-grained, particularly when features like inferred spans are not used. Understanding precisely where in the code path a span originates can be incredibly valuable. To address this need, the Elastic distribution of the OTel Java agent includes the span stack traces feature. This functionality provides crucial insights by collecting corresponding stack traces for spans that exceed a configurable minimum duration, pinpointing exactly where a span is initiated in the code.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distribution-opentelemetry-java-agent/4.png\\",alt:\\"Span stack traces\\",width:\\"1999\\",height:\\"845\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This simple yet powerful feature significantly enhances problem troubleshooting, offering developers a clearer understanding of their application\\\\u2019s performance dynamics.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the example above, it allows you to get the call stack of a gRPC call, which can help understanding which code paths triggered it.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"auto-detection-of-service-and-cloud-resources\\",children:\\"Auto-detection of service and cloud resources\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In today\'s expansive and diverse cloud environments, which often include multiple regions and cloud providers, having information on where your services are operating is incredibly valuable. Particularly in Java services, where the service name is frequently embedded within the deployment artifacts, the ability to automatically retrieve service and cloud resource information marks a substantial leap in usability.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distribution-opentelemetry-java-agent/5.png\\",alt:\\"Auto-detection of service and cloud resources\\",width:\\"1142\\",height:\\"772\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To address this need, the Elastic distribution of the OTel Java agent includes built-in auto detectors for service and cloud resources, specifically for AWS and GCP, sourced from \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib\\",rel:\\"nofollow\\",children:\\"the OpenTelemetry Java Contrib repository\\"}),\\". This feature, which is on by default, enhances observability and streamlines the management of services across various cloud platforms, making it a key asset for any cloud-based deployment.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"ways-to-use-the-edot-java\\",children:\\"Ways to use the EDOT Java\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Elastic distribution of the OTel Java agent is designed to meet our users exactly where they are, accommodating a variety of needs and strategic approaches. Whether you\'re looking to fully integrate new observability features or simply enhance existing setups, the Elastic distribution offers multiple technical pathways to leverage its capabilities. This flexibility ensures that users can tailor the agent\'s implementation to align perfectly with their specific operational requirements and goals.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"using-elastics-distribution-directly\\",children:\\"Using Elastic\\\\u2019s distribution directly\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The most straightforward path to harnessing the capabilities described above is by adopting the Elastic distribution of the OTel Java agent as a drop-in replacement for the standard OTel Java agent. Structurally, the Elastic distro functions as a wrapper around the OTel Java agent, maintaining full compatibility with all upstream configuration options and incorporating all its features. Additionally, it includes the advanced features described above that significantly augment its functionality. Users of the Elastic distribution will also benefit from the comprehensive technical support provided by Elastic, which will commence once the agent achieves general availability. To get started, simply \\",(0,t.jsx)(e.a,{href:\\"https://mvnrepository.com/artifact/co.elastic.otel/elastic-otel-javaagent\\",rel:\\"nofollow\\",children:\\"download the agent Jar file\\"}),\\" and attach it to your application:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`\\\\u200B\\\\u200Bjava -javaagent:/pathto/elastic-otel-javaagent.jar -jar myapp.jar\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"using-elastics-extensions-with-the-vanilla-otel-java-agent\\",children:\\"Using Elastic\\\\u2019s extensions with the vanilla OTel Java agent\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you prefer to continue using the vanilla OTel Java agent but wish to take advantage of the features described above, you have the flexibility to do so. We offer a separate agent extensions package specifically designed for this purpose. To integrate these enhancements, simply \\",(0,t.jsx)(e.a,{href:\\"https://mvnrepository.com/artifact/co.elastic.otel/elastic-otel-agentextension\\",rel:\\"nofollow\\",children:\\"download and place the extensions jar file\\"}),\\" into a designated directory and configure the OTel Java agent extensions directory:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`\\\\u200B\\\\u200BOTEL_JAVAAGENT_EXTENSIONS=/pathto/elastic-otel-agentextension.jar\\njava -javaagent:/pathto/otel-javaagent.jar -jar myapp.jar\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"using-elastics-extensions-manually-with-the-otel-java-sdk\\",children:\\"Using Elastic\\\\u2019s extensions manually with the OTel Java SDK\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you build your instrumentations directly into your applications using the OTel API and rely on the OTel Java SDK instead of the automatic Java agent, you can still use the features we\'ve discussed. Each feature is designed as a standalone component that can be integrated with the OTel Java SDK framework. To implement these features, simply refer to the specific descriptions for each one to learn how to configure the OTel Java SDK accordingly:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java/tree/main/inferred-spans\\",rel:\\"nofollow\\",children:\\"Setting up the inferred spans feature with the SDK\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java/tree/main/universal-profiling-integration\\",rel:\\"nofollow\\",children:\\"Setting up profiling correlation with the SDK\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/span-stacktrace\\",rel:\\"nofollow\\",children:\\"Setting up the span stack traces feature with the SDK\\"})}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Setting up resource detectors with the SDK\\",`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/resource-providers\\",rel:\\"nofollow\\",children:\\"Service resource detectors\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/aws-resources\\",rel:\\"nofollow\\",children:\\"AWS resource detector\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/gcp-resources\\",rel:\\"nofollow\\",children:\\"GCP resource detector\\"})}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This approach ensures that you can tailor your observability tools to meet your specific needs without compromising on functionality.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"future-plans-and-contributions\\",children:\\"Future plans and contributions\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We are committed to OpenTelemetry, and our contributions to the OpenTelemetry Java project will continue without limit. Not only are we focused on general improvements within the OTel Java project, but we are also committed to ensuring that the features discussed in this blog post become official extensions to the OpenTelemetry Java SDK/Agent and are included in the OpenTelemetry Java Contrib repository. We have already contributed the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/span-stacktrace\\",rel:\\"nofollow\\",children:\\"span stack trace feature\\"}),\\" and initiated the contribution of the inferred spans feature, and we are eagerly anticipating the opportunity to add the profiling correlation feature following the successful integration of Elastic\\\\u2019s profiling agent.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Moreover, our efforts extend beyond the current enhancements; we are actively working to port more features from the Elastic APM Java agent to OpenTelemetry. A particularly ambitious yet thrilling endeavor is our project to enable dynamic configurability of the OpenTelemetry Java agent. This future enhancement will allow for the OpenTelemetry Agent Management Protocol (OpAMP) to be used to remotely and dynamically configure OTel Java agents, improving their adaptability and ease of use.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We encourage you to experience the new Elastic distribution of the OTel Java agent and share your feedback with us. Your insights are invaluable as we strive to enhance the capabilities and reach of OpenTelemetry, making it even more powerful and user-friendly.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Check out more information on Elastic Distributions of OpenTelemetry in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry?tab=readme-ov-file\\",rel:\\"nofollow\\",children:\\"github\\"}),\\" and our latest \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry\\",rel:\\"nofollow\\",children:\\"EDOT Blog\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic provides the following components of EDOT:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) Collector\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) Java\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) Python\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) NodeJS\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) .NET\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/apm-ios-android-native-apps\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT)\\\\xA0 iOS and Android\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return w(O);})();\\n;return Component;"},"_id":"articles/elastic-distribution-opentelemetry-java-agent.mdx","_raw":{"sourceFilePath":"articles/elastic-distribution-opentelemetry-java-agent.mdx","sourceFileName":"elastic-distribution-opentelemetry-java-agent.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-distribution-opentelemetry-java-agent"},"type":"Article","imageUrl":"/assets/images/elastic-distribution-opentelemetry-java-agent/observability-launch-series-3-java-auto.jpg","readingTime":"10 min read","url":"/elastic-distribution-opentelemetry-java-agent","headings":[{"level":2,"title":"Elastic Distribution of OpenTelemetry Java (EDOT Java)","href":"#elastic-distribution-of-opentelemetry-java-edot-java"},{"level":2,"title":"Features included","href":"#features-included"},{"level":3,"title":"Inferred spans","href":"#inferred-spans"},{"level":3,"title":"Correlation with profiling","href":"#correlation-with-profiling"},{"level":3,"title":"Span stack traces","href":"#span-stack-traces"},{"level":3,"title":"Auto-detection of service and cloud resources","href":"#auto-detection-of-service-and-cloud-resources"},{"level":2,"title":"Ways to use the EDOT Java","href":"#ways-to-use-the-edot-java"},{"level":3,"title":"Using Elastic’s distribution directly","href":"#using-elastics-distribution-directly"},{"level":3,"title":"Using Elastic’s extensions with the vanilla OTel Java agent","href":"#using-elastics-extensions-with-the-vanilla-otel-java-agent"},{"level":3,"title":"Using Elastic’s extensions manually with the OTel Java SDK","href":"#using-elastics-extensions-manually-with-the-otel-java-sdk"},{"level":2,"title":"Future plans and contributions","href":"#future-plans-and-contributions"}]},{"title":"Introducing Elastic Distributions of OpenTelemetry","slug":"elastic-distributions-opentelemetry","date":"2024-08-15","description":"Elastic is proud to introduce Elastic Distributions of OpenTelemetry (EDOT), which contains Elastic’s versions of the OpenTelemetry Collector and several language SDKs like Python, Java, .NET, and NodeJS. These help provide enhanced features and enterprise-grade support for EDOT.","image":"edot-image.png","author":[{"slug":"alexander-wert","type":"Author","_raw":{}},{"slug":"miguel-luna","type":"Author","_raw":{}},{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"log analytics","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nWe are announcing the availability of Elastic Distributions of OpenTelemetry (EDOT). These Elastic distributions, currently in tech preview,\xa0 have been developed to enhance the capabilities of standard OpenTelemetry distributions and improve existing OpenTelemetry support from Elastic.\xa0\\n\\nThe Elastic Distributions of OpenTelemetry (EDOT) are composed of OpenTelemetry (OTel) project components, OTel Collector, and language SDKs,\xa0 which provide users with the necessary capabilities and out-of-the-box configurations, enabling quick and effortless infra and application monitoring.\\n\\nWhile OTel components are feature-rich, enhancements through the community can take time. Additionally, support is left up to the community or individual users and organizations. Hence EDOT will bring the following to end users:\\n\\n- **Deliver enhanced features earlier than OTel**: By providing features unavailable in the “vanilla” OpenTelemetry components, we can quickly meet customers’ requirements while still providing an OpenTelemetry native and vendor-agnostic instrumentation for their applications. Elastic will continuously upstream these enhanced features.\\n\\n- **Enhanced OTel support** - By maintaining Elastic distributions, we can better support customers with enhancements and fixes outside of the OTel release cycles. In addition, Elastic support can troubleshoot issues on the EDOT.\\n\\nEDOT currently includes the following tech preview components, which will\xa0 grow over time:\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) Collector](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector)\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) Java](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent).\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) Python](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python)\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) NodeJS](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js)\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT) .NET](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications)\\n\\n- [Elastic Distribution of OpenTelemetry (EDOT)\xa0 iOS and Android](https://www.elastic.co/observability-labs/blog/apm-ios-android-native-apps)\\n\\nDetails and documentation for all EDOT are available in our public [OpenTelemetry GitHub repository](https://github.com/elastic/opentelemetry).\xa0\\n\\n\\n![EDOT Components](/assets/images/elastic-distributions-opentelemetry/edot-components.png)\\n\\n\\n## Elastic Distribution of OpenTelemetry (EDOT) Collector\\n\\nThe EDOT Collector, recently released with the 8.15 release of Elastic Observability enhances Elastic’s existing OTel capabilities. The EDOT Collector can, in addition to service monitoring, forward application logs, infrastructure logs, and metrics using standard OpenTelemetry Collector receivers like file logs and host metrics receivers.\\n\\nAdditionally, users of the Elastic Distribution of the OpenTelemetry Collector benefit from container logs automatically enriched with Kubernetes metadata by leveraging the powerful [container log parser](https://opentelemetry.io/blog/2024/otel-collector-container-log-parser/) that Elastic recently contributed. This OpenTelemetry-based enrichment enhances the context and value of the collected logs, providing deeper insights and more effective troubleshooting capabilities.\\n\\nThis new collector distribution ensures that exported data is fully compatible with the Elastic Platform, enhancing the overall observability experience. Elastic also ensures that Elastic-curated UIs can seamlessly handle both the Elastic Common Schema (ECS) and OpenTelemetry formats.\\n\\n\\n## Elastic Distributions for Language SDKs\\n\\n[Elastic\'s APM agents](https://www.elastic.co/guide/en/apm/agent/index.html) have capabilities yet to be available in the OTel SDKs. EDOT brings these capabilities into the OTel language SDKs while maintaining seamless integration with Elastic Observability. Elastic will release OTel versions of all its APM agents, and continue to add additional language SDKs mirroring OTel.\\n\\n\\n## Continued support for Native OTel components\\n\\nEDOT does not preclude users from using native components. Users are still able to use:\\n\\n- **OpenTelemetry Vanilla Language SDKs:** use standard OpenTelemetry code instrumentation for many popular programming languages sending OTLP traces to Elastic via APM server.\\n\\n- **Upstream Distribution of OpenTelemetry Collector (Contrib or Custom):** Send traces using the OpenTelemetry Collector with OTLP receiver and OTLP exporter to Elastic via APM server.\\n\\nElastic is committed to contributing EDOT features or components upstream into the OpenTelemetry community, fostering a collaborative environment, and enhancing the overall OpenTelemetry ecosystem.\\n\\n\\n## Extending our commitment to vendor-agnostic data collection\\n\\nElastic remains committed to supporting OpenTelemetry by being OTel first and building a vendor-agnostic framework. As OpenTelemetry constantly grows its support of SDKs and components,\xa0 Elastic will continue to refine and mirror EDOT to OpenTelemetry and push enhancements upstream.\xa0\\n\\nOver the past year, Elastic has been active in OTel through its [donation of Elastic Common Schema (ECS)](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/), contributions to the native [OpenTelemetry Collector](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector) and language SDKs, and a recent [donation of its Universal Profiling agent](https://www.elastic.co/observability-labs/blog/elastic-profiling-agent-acceptance-opentelemetry) to OpenTelemetry.\xa0\\n\\nEDOT\xa0 builds on our decision to fully adopt and recommend OpenTelemetry as the preferred solution for observing applications. With EDOT, Elastic customers can future-proof their investments and adopt OpenTelemetry, giving them vendor-neutral instrumentation with Elastic enterprise-grade support.\\n\\nOur vision is that Elastic will work with the OpenTelemetry community to donate features through the standardization processes and contribute the code to implement those in the native OpenTelemetry components. In time, as OTel capabilities advance, and many of the Elastic-exclusive features transition into OpenTelemetry, we look forward to no longer having Elastic Distributions for OpenTelemetry.. In the meantime, we can deliver those capabilities via our OpenTelemetry distributions.\\n","code":"var Component=(()=>{var p=Object.create;var l=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),y=(n,e)=>{for(var i in e)l(n,i,{get:e[i],enumerable:!0})},a=(n,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!b.call(n,o)&&o!==i&&l(n,o,{get:()=>e[o],enumerable:!(r=u(e,o))||r.enumerable});return n};var T=(n,e,i)=>(i=n!=null?p(g(n)):{},a(e||!n||!n.__esModule?l(i,\\"default\\",{value:n,enumerable:!0}):i,n)),O=n=>a(l({},\\"__esModule\\",{value:!0}),n);var c=f((D,s)=>{s.exports=_jsx_runtime});var w={};y(w,{default:()=>h,frontmatter:()=>v});var t=T(c()),v={title:\\"Introducing Elastic Distributions of OpenTelemetry\\",slug:\\"elastic-distributions-opentelemetry\\",date:\\"2024-08-15\\",description:\\"Elastic is proud to introduce Elastic Distributions of OpenTelemetry (EDOT), which contains Elastic\\\\u2019s versions of the OpenTelemetry Collector and several language SDKs like Python, Java, .NET, and NodeJS. These help provide enhanced features and enterprise-grade support for EDOT.\\",author:[{slug:\\"alexander-wert\\"},{slug:\\"miguel-luna\\"},{slug:\\"bahubali-shetti\\"}],image:\\"edot-image.png\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"log analytics\\"},{slug:\\"instrumentation\\"}]};function d(n){let e={a:\\"a\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"We are announcing the availability of Elastic Distributions of OpenTelemetry (EDOT). These Elastic distributions, currently in tech preview,\\\\xA0 have been developed to enhance the capabilities of standard OpenTelemetry distributions and improve existing OpenTelemetry support from Elastic.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Elastic Distributions of OpenTelemetry (EDOT) are composed of OpenTelemetry (OTel) project components, OTel Collector, and language SDKs,\\\\xA0 which provide users with the necessary capabilities and out-of-the-box configurations, enabling quick and effortless infra and application monitoring.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"While OTel components are feature-rich, enhancements through the community can take time. Additionally, support is left up to the community or individual users and organizations. Hence EDOT will bring the following to end users:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Deliver enhanced features earlier than OTel\\"}),\\": By providing features unavailable in the \\\\u201Cvanilla\\\\u201D OpenTelemetry components, we can quickly meet customers\\\\u2019 requirements while still providing an OpenTelemetry native and vendor-agnostic instrumentation for their applications. Elastic will continuously upstream these enhanced features.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Enhanced OTel support\\"}),\\" - By maintaining Elastic distributions, we can better support customers with enhancements and fixes outside of the OTel release cycles. In addition, Elastic support can troubleshoot issues on the EDOT.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"EDOT currently includes the following tech preview components, which will\\\\xA0 grow over time:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) Collector\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) Java\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) Python\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) NodeJS\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT) .NET\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/apm-ios-android-native-apps\\",rel:\\"nofollow\\",children:\\"Elastic Distribution of OpenTelemetry (EDOT)\\\\xA0 iOS and Android\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Details and documentation for all EDOT are available in our public \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry\\",rel:\\"nofollow\\",children:\\"OpenTelemetry GitHub repository\\"}),\\".\\\\xA0\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-distributions-opentelemetry/edot-components.png\\",alt:\\"EDOT Components\\",width:\\"1143\\",height:\\"348\\"})}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"elastic-distribution-of-opentelemetry-edot-collector\\",children:[\\"Elastic Distribution of OpenTelemetry (EDOT) Collector\\",(0,t.jsx)(\\"a\\",{id:\\"elastic-distribution-of-opentelemetry-edot-collector\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The EDOT Collector, recently released with the 8.15 release of Elastic Observability enhances Elastic\\\\u2019s existing OTel capabilities. The EDOT Collector can, in addition to service monitoring, forward application logs, infrastructure logs, and metrics using standard OpenTelemetry Collector receivers like file logs and host metrics receivers.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Additionally, users of the Elastic Distribution of the OpenTelemetry Collector benefit from container logs automatically enriched with Kubernetes metadata by leveraging the powerful \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2024/otel-collector-container-log-parser/\\",rel:\\"nofollow\\",children:\\"container log parser\\"}),\\" that Elastic recently contributed. This OpenTelemetry-based enrichment enhances the context and value of the collected logs, providing deeper insights and more effective troubleshooting capabilities.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This new collector distribution ensures that exported data is fully compatible with the Elastic Platform, enhancing the overall observability experience. Elastic also ensures that Elastic-curated UIs can seamlessly handle both the Elastic Common Schema (ECS) and OpenTelemetry formats.\\"}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"elastic-distributions-for-language-sdks\\",children:[\\"Elastic Distributions for Language SDKs\\",(0,t.jsx)(\\"a\\",{id:\\"elastic-distributions-for-language-sdks\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/index.html\\",rel:\\"nofollow\\",children:\\"Elastic\'s APM agents\\"}),\\" have capabilities yet to be available in the OTel SDKs. EDOT brings these capabilities into the OTel language SDKs while maintaining seamless integration with Elastic Observability. Elastic will release OTel versions of all its APM agents, and continue to add additional language SDKs mirroring OTel.\\"]}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"continued-support-for-native-otel-components\\",children:[\\"Continued support for Native OTel components\\",(0,t.jsx)(\\"a\\",{id:\\"continued-support-for-native-otel-components\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"EDOT does not preclude users from using native components. Users are still able to use:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"OpenTelemetry Vanilla Language SDKs:\\"}),\\" use standard OpenTelemetry code instrumentation for many popular programming languages sending OTLP traces to Elastic via APM server.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Upstream Distribution of OpenTelemetry Collector (Contrib or Custom):\\"}),\\" Send traces using the OpenTelemetry Collector with OTLP receiver and OTLP exporter to Elastic via APM server.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic is committed to contributing EDOT features or components upstream into the OpenTelemetry community, fostering a collaborative environment, and enhancing the overall OpenTelemetry ecosystem.\\"}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"extending-our-commitment-to-vendor-agnostic-data-collection\\",children:[\\"Extending our commitment to vendor-agnostic data collection\\",(0,t.jsx)(\\"a\\",{id:\\"extending-our-commitment-to-vendor-agnostic-data-collection\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic remains committed to supporting OpenTelemetry by being OTel first and building a vendor-agnostic framework. As OpenTelemetry constantly grows its support of SDKs and components,\\\\xA0 Elastic will continue to refine and mirror EDOT to OpenTelemetry and push enhancements upstream.\\\\xA0\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Over the past year, Elastic has been active in OTel through its \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"donation of Elastic Common Schema (ECS)\\"}),\\", contributions to the native \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Collector\\"}),\\" and language SDKs, and a recent \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-profiling-agent-acceptance-opentelemetry\\",rel:\\"nofollow\\",children:\\"donation of its Universal Profiling agent\\"}),\\" to OpenTelemetry.\\\\xA0\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"EDOT\\\\xA0 builds on our decision to fully adopt and recommend OpenTelemetry as the preferred solution for observing applications. With EDOT, Elastic customers can future-proof their investments and adopt OpenTelemetry, giving them vendor-neutral instrumentation with Elastic enterprise-grade support.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Our vision is that Elastic will work with the OpenTelemetry community to donate features through the standardization processes and contribute the code to implement those in the native OpenTelemetry components. In time, as OTel capabilities advance, and many of the Elastic-exclusive features transition into OpenTelemetry, we look forward to no longer having Elastic Distributions for OpenTelemetry.. In the meantime, we can deliver those capabilities via our OpenTelemetry distributions.\\"})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return O(w);})();\\n;return Component;"},"_id":"articles/elastic-distributions-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/elastic-distributions-opentelemetry.mdx","sourceFileName":"elastic-distributions-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-distributions-opentelemetry"},"type":"Article","imageUrl":"/assets/images/elastic-distributions-opentelemetry/edot-image.png","readingTime":"4 min read","url":"/elastic-distributions-opentelemetry","headings":[{"level":2,"title":"Elastic Distribution of OpenTelemetry (EDOT) Collector","href":"#elastic-distribution-of-opentelemetry-edot-collectora-idelastic-distribution-of-opentelemetry-edot-collectora"},{"level":2,"title":"Elastic Distributions for Language SDKs","href":"#elastic-distributions-for-language-sdksa-idelastic-distributions-for-language-sdksa"},{"level":2,"title":"Continued support for Native OTel components","href":"#continued-support-for-native-otel-componentsa-idcontinued-support-for-native-otel-componentsa"},{"level":2,"title":"Extending our commitment to vendor-agnostic data collection","href":"#extending-our-commitment-to-vendor-agnostic-data-collectiona-idextending-our-commitment-to-vendor-agnostic-data-collectiona"}]},{"title":"OpenTelemetry and Elastic: Working together to establish continuous profiling for the community","slug":"elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry","date":"2024-03-12","description":"OpenTelemetry is embracing profiling. Elastic is donating its whole-system continuous profiling agent to OpenTelemetry to further this advancement, empowering OTel users to improve computational efficiency and reduce their carbon footprint.","image":"ecs-otel-announcement-1.jpeg","author":[{"slug":"israel-ogbole","type":"Author","_raw":{}},{"slug":"alexander-wert","type":"Author","_raw":{}},{"slug":"tim-ruhsen","type":"Author","_raw":{}}],"tags":[{"slug":"universal-profiling","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}}],"body":{"raw":"\\nProfiling is emerging as a core pillar of observability, aptly dubbed the fourth pillar, with the OpenTelemetry (OTel) project leading this essential development. This blog post dives into the recent advancements in profiling within OTel and how Elastic\xae is actively contributing toward it.\\n\\nAt Elastic, we’re big believers in and contributors to the OpenTelemetry project. The project’s benefits of flexibility, performance, and vendor agnosticism have been making their rounds; we’ve seen a groundswell of customer interest.\\n\\nTo this end, after donating our [**Elastic Common Schema**](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-faq) and our [invokedynamic based java agent approach](https://www.elastic.co/blog/elastic-invokedynamic-opentelemetry-java-agent), we recently [announced our intent to donate our continuous profiling agent](https://github.com/open-telemetry/community/issues/1918) — a whole-system, always-on, continuous profiling solution that eliminates the need for run-time/bytecode instrumentation, recompilation, on-host debug symbols, or service restarts.\\n\\nProfiling helps organizations run efficient services by minimizing computational wastage, thereby reducing operational costs. Leveraging [eBPF](https://ebpf.io/), the Elastic profiling agent provides unprecedented visibility into the runtime behavior of all applications: it builds stacktraces that go from the kernel, through userspace native code, all the way into code running in higher level runtimes, enabling you to identify performance regressions, reduce wasteful computations, and debug complex issues faster.\\n\\n## Enabling profiling in OpenTelemetry: A step toward unified observability\\n\\nElastic actively participates in the OTel community, particularly within the Profiling Special Interest Group (SIG). This group has been instrumental in defining the OTel [Profiling Data Model](https://github.com/open-telemetry/oteps/blob/main/text/profiles/0239-profiles-data-model.md), a crucial step toward standardizing profiling data.\\n\\nThe recent merger of the [OpenTelemetry Enhancement Proposal (OTEP) introducing profiling support to the OpenTelemetry Protocol (OTLP)](https://github.com/open-telemetry/oteps/pull/239) marks a significant milestone. With the standardization of profiles as a core observability pillar alongside metrics, tracing, and logs, OTel offers a comprehensive suite of observability tools, empowering users to gain a holistic view of their applications\' health and performance.\\n\\nIn line with this advancement, we are donating our whole-system, eBPF-based continuous profiling agent to OTel. In parallel, we are implementing the experimental OTel Profiling signal in the profiling agent, to ensure and demonstrate OTel protocol compatibility in the agent and prepare it for a fully OTel-based collection of profiling signals and correlate it to logs, metrics, and traces.\\n\\n## Why is Elastic donating the eBPF-based profiling agent to OpenTelemetry?\\n\\nComputational efficiency has always been a critical concern for software professionals. However, in an era where every line of code affects both the bottom line and the environment, there\'s an additional reason to focus on it. Elastic is committed to helping the OpenTelemetry community enhance computational efficiency because efficient software not only reduces the cost of goods sold (COGS) but also reduces carbon footprint.\\n\\nWe have seen firsthand — both internally and from our customers\' testimonials — how profiling insights aid in enhancing software efficiency. This results in an improved customer experience, lower resource consumption, and reduced cloud costs.\\n\\n![A differential flamegraph showing regression in release comparison](/assets/images/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry/1-flamegraph.png)\\n\\nMoreover, adopting a whole-system profiling strategy, such as [Elastic Universal Profiling](https://www.elastic.co/blog/whole-system-visibility-elastic-universal-profiling), differs significantly from traditional instrumentation profilers that focus solely on runtime. Elastic Universal Profiling provides whole-system visibility, profiling not only your own code but also third-party libraries, kernel operations, and other code you don\'t own. This comprehensive approach facilitates rapid optimizations by identifying non-optimal common libraries and uncovering \\"unknown unknowns\\" that consume CPU cycles. Often, a tipping point is reached when the resource consumption of libraries or certain daemon processes exceeds that of the applications themselves. Without system-wide profiling, along with the capabilities to slice data per service and aggregate total usage, pinpointing these resource-intensive components becomes a formidable challenge.\\n\\nAt Elastic, we have a customer with an extensive cloud footprint who plans to negotiate with their cloud provider to reclaim money for the significant compute resource consumed by the cloud provider\'s in-VM agents. These examples highlight the importance of whole-system profiling and the benefits that the OpenTelemetry community will gain if the donation proposal is accepted.\\n\\nSpecifically, OTel users will gain access to a lightweight, battle-tested production-grade continuous profiling agent with the following features:\\n\\n- Very low CPU and memory overhead (1% CPU and 250MB memory are our upper limits in testing, and the agent typically manages to stay way below that)\\n\\n- Support for native C/C++ executables without the need for DWARF debug information by leveraging .eh_frame data, as described in “[How Universal Profiling unwinds stacks without frame pointers and symbols](https://www.elastic.co/blog/universal-profiling-frame-pointers-symbols-ebpf)”\\n\\n- Support profiling of system libraries without frame pointers and without debug symbols on the host\\n\\n- Support for mixed stacktraces between runtimes — stacktraces go from Kernel space through unmodified system libraries all the way into high-level languages\\n\\n- Support for native code (C/C++, Rust, Zig, Go, etc. without debug symbols on host)\\n\\n- Support for a broad set of High-level languages (Hotspot JVM, Python, Ruby, PHP, Node.JS, V8, Perl), .NET is in preparation\\n\\n- **100% non-intrusive:** there\'s no need to load agents or libraries into the processes that are being profiled\\n\\n- No need for any reconfiguration, instrumentation, or restarts of HLL interpreters and VMs: the agent supports unwinding each of the supported languages in the default configuration\\n\\n- Support for x86 and Arm64 CPU architectures\\n\\n- Support for native inline frames, which provide insights into compiler optimizations and offer a higher precision of function call chains\\n\\n- Support for [Probabilistic Profiling](https://www.elastic.co/guide/en/observability/current/profiling-probabilistic-profiling.html) to reduce data storage costs\\n\\n- . . . and more\\n\\nElastic\'s commitment to enhancing computational efficiency and our belief in the OpenTelemetry vision underscores our dedication to advancing the observability ecosystem –– by donating the profiling agent. Elastic is not only contributing technology but also dedicating a team of specialized profiling domain experts to co-maintain and advance the profiling capabilities within OpenTelemetry.\\n\\n## How does this donation benefit the OTel community?\\n\\nMetrics, logs, and traces offer invaluable insights into system health. But what if you could unlock an even deeper level of visibility? Here\'s why profiling is a perfect complement to your OTel toolkit:\\n\\n### 1. Deep system visibility: Beyond the surface\\n\\nThink of whole-system profiling as an MRI scan for your fleet. It goes deeper into the internals of your system, revealing hidden performance issues lurking beneath the surface. You can identify \\"unknown unknowns\\" — inefficiencies you wouldn\'t have noticed otherwise — and gain a comprehensive understanding of how your system functions at its core.\\n\\n### 2. Cross-signal correlation: Answering \\"why\\" with confidence\\n\\nThe Elastic Universal Profiling agent supports trace correlation with the OTel Java agent/SDK (with Go support coming soon!). This correlation enables OTel users to view profiling data by services or service endpoints, allowing for a more context-aware and targeted root cause analysis. This powerful combination allows you to pinpoint the exact cause of resource consumption at the trace level. No more guessing why specific functions hog CPU or why certain events occur. You can finally answer the critical \\"why\\" questions with precision, enabling targeted optimization efforts.\\n\\n### 3. Cost and sustainability optimization: Beyond performance\\n\\nOur approach to profiling goes beyond just performance gains. By correlating whole-system profiling data with tracing, we can help you measure the environmental impact and cloud cost associated with specific services and functionalities within your application. This empowers you to make data-driven decisions that optimize both performance and resource utilization, leading to a more sustainable and cost-effective operation.\\n\\n![A differential function insight, showing the performance, cost, and CO2 impact of a change](/assets/images/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry/2-universal-profiling.png)\\n\\n## Elastic\'s commitment to OpenTelemetry\\n\\nElastic currently supports a growing list of Cloud Native Computing Foundation (CNCF) projects [such as Kubernetes (K8S), Prometheus, Fluentd, Fluent Bit, and Istio](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf). [Elastic’s application performance monitoring (APM)](https://www.elastic.co/observability/application-performance-monitoring) also natively supports OTel, ensuring all APM capabilities are available with either Elastic or OTel agents or a combination of the two. In addition to the ECS contribution and ongoing collaboration with OTel SemConv, Elastic [has continued to make contributions to other OTel projects](https://www.elastic.co/observability/opentelemetry), including language SDKs (such as OTel Swift, OTel Go, OTel Ruby, and others), and participates in several [special interest groups (SIGs)](https://github.com/open-telemetry/community#special-interest-groups) to establish OTel as a standard for observability and security.\\n\\nWe are excited about our [strengthening relationship with OTel](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/) and the opportunity to donate our profiling agent in a way that benefits both the Elastic community and the broader OTel community.Learn more about [Elastic’s OpenTelemetry support](https://www.elastic.co/observability/opentelemetry) or contribute to the [donation proposal or just join the conversation](https://github.com/open-telemetry/community/issues/1918).\\n\\nStay tuned for further updates as the profiling part of OTel continues to evolve.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var d=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var t in e)r(n,t,{get:e[t],enumerable:!0})},l=(n,e,t,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of f(e))!m.call(n,o)&&o!==t&&r(n,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return n};var w=(n,e,t)=>(t=n!=null?d(g(n)):{},l(e||!n||!n.__esModule?r(t,\\"default\\",{value:n,enumerable:!0}):t,n)),v=n=>l(r({},\\"__esModule\\",{value:!0}),n);var c=y((E,s)=>{s.exports=_jsx_runtime});var O={};b(O,{default:()=>p,frontmatter:()=>T});var i=w(c()),T={title:\\"OpenTelemetry and Elastic: Working together to establish continuous profiling for the community\\",slug:\\"elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry\\",date:\\"2024-03-12\\",description:\\"OpenTelemetry is embracing profiling. Elastic is donating its whole-system continuous profiling agent to OpenTelemetry to further this advancement, empowering OTel users to improve computational efficiency and reduce their carbon footprint.\\",author:[{slug:\\"israel-ogbole\\"},{slug:\\"alexander-wert\\"},{slug:\\"tim-ruhsen\\"}],image:\\"ecs-otel-announcement-1.jpeg\\",tags:[{slug:\\"universal-profiling\\"},{slug:\\"opentelemetry\\"}]};function h(n){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(e.p,{children:\\"Profiling is emerging as a core pillar of observability, aptly dubbed the fourth pillar, with the OpenTelemetry (OTel) project leading this essential development. This blog post dives into the recent advancements in profiling within OTel and how Elastic\\\\xAE is actively contributing toward it.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"At Elastic, we\\\\u2019re big believers in and contributors to the OpenTelemetry project. The project\\\\u2019s benefits of flexibility, performance, and vendor agnosticism have been making their rounds; we\\\\u2019ve seen a groundswell of customer interest.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"To this end, after donating our \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-faq\\",rel:\\"nofollow\\",children:(0,i.jsx)(e.strong,{children:\\"Elastic Common Schema\\"})}),\\" and our \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-invokedynamic-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"invokedynamic based java agent approach\\"}),\\", we recently \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/community/issues/1918\\",rel:\\"nofollow\\",children:\\"announced our intent to donate our continuous profiling agent\\"}),\\" \\\\u2014 a whole-system, always-on, continuous profiling solution that eliminates the need for run-time/bytecode instrumentation, recompilation, on-host debug symbols, or service restarts.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Profiling helps organizations run efficient services by minimizing computational wastage, thereby reducing operational costs. Leveraging \\",(0,i.jsx)(e.a,{href:\\"https://ebpf.io/\\",rel:\\"nofollow\\",children:\\"eBPF\\"}),\\", the Elastic profiling agent provides unprecedented visibility into the runtime behavior of all applications: it builds stacktraces that go from the kernel, through userspace native code, all the way into code running in higher level runtimes, enabling you to identify performance regressions, reduce wasteful computations, and debug complex issues faster.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"enabling-profiling-in-opentelemetry-a-step-toward-unified-observability\\",children:\\"Enabling profiling in OpenTelemetry: A step toward unified observability\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic actively participates in the OTel community, particularly within the Profiling Special Interest Group (SIG). This group has been instrumental in defining the OTel \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/oteps/blob/main/text/profiles/0239-profiles-data-model.md\\",rel:\\"nofollow\\",children:\\"Profiling Data Model\\"}),\\", a crucial step toward standardizing profiling data.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"The recent merger of the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/oteps/pull/239\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Enhancement Proposal (OTEP) introducing profiling support to the OpenTelemetry Protocol (OTLP)\\"}),\\" marks a significant milestone. With the standardization of profiles as a core observability pillar alongside metrics, tracing, and logs, OTel offers a comprehensive suite of observability tools, empowering users to gain a holistic view of their applications\' health and performance.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"In line with this advancement, we are donating our whole-system, eBPF-based continuous profiling agent to OTel. In parallel, we are implementing the experimental OTel Profiling signal in the profiling agent, to ensure and demonstrate OTel protocol compatibility in the agent and prepare it for a fully OTel-based collection of profiling signals and correlate it to logs, metrics, and traces.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"why-is-elastic-donating-the-ebpf-based-profiling-agent-to-opentelemetry\\",children:\\"Why is Elastic donating the eBPF-based profiling agent to OpenTelemetry?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Computational efficiency has always been a critical concern for software professionals. However, in an era where every line of code affects both the bottom line and the environment, there\'s an additional reason to focus on it. Elastic is committed to helping the OpenTelemetry community enhance computational efficiency because efficient software not only reduces the cost of goods sold (COGS) but also reduces carbon footprint.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"We have seen firsthand \\\\u2014 both internally and from our customers\' testimonials \\\\u2014 how profiling insights aid in enhancing software efficiency. This results in an improved customer experience, lower resource consumption, and reduced cloud costs.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry/1-flamegraph.png\\",alt:\\"A differential flamegraph showing regression in release comparison\\",width:\\"1999\\",height:\\"1273\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Moreover, adopting a whole-system profiling strategy, such as \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whole-system-visibility-elastic-universal-profiling\\",rel:\\"nofollow\\",children:\\"Elastic Universal Profiling\\"}),`, differs significantly from traditional instrumentation profilers that focus solely on runtime. Elastic Universal Profiling provides whole-system visibility, profiling not only your own code but also third-party libraries, kernel operations, and other code you don\'t own. This comprehensive approach facilitates rapid optimizations by identifying non-optimal common libraries and uncovering \\"unknown unknowns\\" that consume CPU cycles. Often, a tipping point is reached when the resource consumption of libraries or certain daemon processes exceeds that of the applications themselves. Without system-wide profiling, along with the capabilities to slice data per service and aggregate total usage, pinpointing these resource-intensive components becomes a formidable challenge.`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"At Elastic, we have a customer with an extensive cloud footprint who plans to negotiate with their cloud provider to reclaim money for the significant compute resource consumed by the cloud provider\'s in-VM agents. These examples highlight the importance of whole-system profiling and the benefits that the OpenTelemetry community will gain if the donation proposal is accepted.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Specifically, OTel users will gain access to a lightweight, battle-tested production-grade continuous profiling agent with the following features:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Very low CPU and memory overhead (1% CPU and 250MB memory are our upper limits in testing, and the agent typically manages to stay way below that)\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[\\"Support for native C/C++ executables without the need for DWARF debug information by leveraging .eh_frame data, as described in \\\\u201C\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/universal-profiling-frame-pointers-symbols-ebpf\\",rel:\\"nofollow\\",children:\\"How Universal Profiling unwinds stacks without frame pointers and symbols\\"}),\\"\\\\u201D\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Support profiling of system libraries without frame pointers and without debug symbols on the host\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Support for mixed stacktraces between runtimes \\\\u2014 stacktraces go from Kernel space through unmodified system libraries all the way into high-level languages\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Support for native code (C/C++, Rust, Zig, Go, etc. without debug symbols on host)\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Support for a broad set of High-level languages (Hotspot JVM, Python, Ruby, PHP, Node.JS, V8, Perl), .NET is in preparation\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"100% non-intrusive:\\"}),\\" there\'s no need to load agents or libraries into the processes that are being profiled\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"No need for any reconfiguration, instrumentation, or restarts of HLL interpreters and VMs: the agent supports unwinding each of the supported languages in the default configuration\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Support for x86 and Arm64 CPU architectures\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Support for native inline frames, which provide insights into compiler optimizations and offer a higher precision of function call chains\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[\\"Support for \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/profiling-probabilistic-profiling.html\\",rel:\\"nofollow\\",children:\\"Probabilistic Profiling\\"}),\\" to reduce data storage costs\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\". . . and more\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic\'s commitment to enhancing computational efficiency and our belief in the OpenTelemetry vision underscores our dedication to advancing the observability ecosystem \\\\u2013\\\\u2013 by donating the profiling agent. Elastic is not only contributing technology but also dedicating a team of specialized profiling domain experts to co-maintain and advance the profiling capabilities within OpenTelemetry.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"how-does-this-donation-benefit-the-otel-community\\",children:\\"How does this donation benefit the OTel community?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Metrics, logs, and traces offer invaluable insights into system health. But what if you could unlock an even deeper level of visibility? Here\'s why profiling is a perfect complement to your OTel toolkit:\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"1-deep-system-visibility-beyond-the-surface\\",children:\\"1. Deep system visibility: Beyond the surface\\"}),`\\n`,(0,i.jsx)(e.p,{children:`Think of whole-system profiling as an MRI scan for your fleet. It goes deeper into the internals of your system, revealing hidden performance issues lurking beneath the surface. You can identify \\"unknown unknowns\\" \\\\u2014 inefficiencies you wouldn\'t have noticed otherwise \\\\u2014 and gain a comprehensive understanding of how your system functions at its core.`}),`\\n`,(0,i.jsx)(e.h3,{id:\\"2-cross-signal-correlation-answering-why-with-confidence\\",children:\'2. Cross-signal correlation: Answering \\"why\\" with confidence\'}),`\\n`,(0,i.jsx)(e.p,{children:\'The Elastic Universal Profiling agent supports trace correlation with the OTel Java agent/SDK (with Go support coming soon!). This correlation enables OTel users to view profiling data by services or service endpoints, allowing for a more context-aware and targeted root cause analysis. This powerful combination allows you to pinpoint the exact cause of resource consumption at the trace level. No more guessing why specific functions hog CPU or why certain events occur. You can finally answer the critical \\"why\\" questions with precision, enabling targeted optimization efforts.\'}),`\\n`,(0,i.jsx)(e.h3,{id:\\"3-cost-and-sustainability-optimization-beyond-performance\\",children:\\"3. Cost and sustainability optimization: Beyond performance\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Our approach to profiling goes beyond just performance gains. By correlating whole-system profiling data with tracing, we can help you measure the environmental impact and cloud cost associated with specific services and functionalities within your application. This empowers you to make data-driven decisions that optimize both performance and resource utilization, leading to a more sustainable and cost-effective operation.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry/2-universal-profiling.png\\",alt:\\"A differential function insight, showing the performance, cost, and CO2 impact of a change\\",width:\\"1999\\",height:\\"1383\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"elastics-commitment-to-opentelemetry\\",children:\\"Elastic\'s commitment to OpenTelemetry\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic currently supports a growing list of Cloud Native Computing Foundation (CNCF) projects \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"such as Kubernetes (K8S), Prometheus, Fluentd, Fluent Bit, and Istio\\"}),\\". \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s application performance monitoring (APM)\\"}),\\" also natively supports OTel, ensuring all APM capabilities are available with either Elastic or OTel agents or a combination of the two. In addition to the ECS contribution and ongoing collaboration with OTel SemConv, Elastic \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"has continued to make contributions to other OTel projects\\"}),\\", including language SDKs (such as OTel Swift, OTel Go, OTel Ruby, and others), and participates in several \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/community#special-interest-groups\\",rel:\\"nofollow\\",children:\\"special interest groups (SIGs)\\"}),\\" to establish OTel as a standard for observability and security.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"We are excited about our \\",(0,i.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"strengthening relationship with OTel\\"}),\\" and the opportunity to donate our profiling agent in a way that benefits both the Elastic community and the broader OTel community.Learn more about \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s OpenTelemetry support\\"}),\\" or contribute to the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/community/issues/1918\\",rel:\\"nofollow\\",children:\\"donation proposal or just join the conversation\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Stay tuned for further updates as the profiling part of OTel continues to evolve.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function p(n={}){let{wrapper:e}=n.components||{};return e?(0,i.jsx)(e,{...n,children:(0,i.jsx)(h,{...n})}):h(n)}return v(O);})();\\n;return Component;"},"_id":"articles/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry.mdx","sourceFileName":"elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry"},"type":"Article","imageUrl":"/assets/images/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry/ecs-otel-announcement-1.jpeg","readingTime":"8 min read","url":"/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry","headings":[{"level":2,"title":"Enabling profiling in OpenTelemetry: A step toward unified observability","href":"#enabling-profiling-in-opentelemetry-a-step-toward-unified-observability"},{"level":2,"title":"Why is Elastic donating the eBPF-based profiling agent to OpenTelemetry?","href":"#why-is-elastic-donating-the-ebpf-based-profiling-agent-to-opentelemetry"},{"level":2,"title":"How does this donation benefit the OTel community?","href":"#how-does-this-donation-benefit-the-otel-community"},{"level":3,"title":"1. Deep system visibility: Beyond the surface","href":"#1-deep-system-visibility-beyond-the-surface"},{"level":3,"title":"2. Cross-signal correlation: Answering \\"why\\" with confidence","href":"#2-cross-signal-correlation-answering-why-with-confidence"},{"level":3,"title":"3. Cost and sustainability optimization: Beyond performance","href":"#3-cost-and-sustainability-optimization-beyond-performance"},{"level":2,"title":"Elastic\'s commitment to OpenTelemetry","href":"#elastics-commitment-to-opentelemetry"}]},{"title":"Migrating from Elastic’s Go APM agent to OpenTelemetry Go SDK","slug":"elastic-go-apm-agent-to-opentelemetry-go-sdk","date":"2024-04-15","description":"As OpenTelemetry is fast becoming an industry standard, Elastic is fast adopting it as well. In this post, we show you a safe and easy way to migrate your Go application from our APM agent to OpenTelemetry.","image":"elastic-de-136675-V1_V1_(1).jpg","author":[{"slug":"damien-mathieu","type":"Author","_raw":{}}],"tags":[{"slug":"go","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs [we’ve already shared](https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions), Elastic is committed to helping OpenTelemetry (OTel) succeed, which means, in some cases, building distributions of language SDKs.\\n\\nElastic is strategically standardizing on OTel for observability and security data collection. Additionally, Elastic is committed to working with the OTel community to become the best data collection infrastructure for the observability ecosystem. Elastic is deepening its relationship with OTel beyond the recent contributions of the [Elastic Common Schema (ECS) to OpenTelemetry](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-faq), [invokedynamic in the OTel Java agent](https://www.elastic.co/blog/elastic-invokedynamic-opentelemetry-java-agent), and the [upcoming profiling agent donation](https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry).\\n\\nSince Elastic version 7.14, Elastic has supported OTel natively by being able to directly ingest OpenTelemetry protocol (OTLP)-based traces, metrics, and logs.\\n\\nThe Go SDK is a bit different from the other language SDKs, as the Go language inherently lacks the dynamicity that would allow building a distribution that is not a fork.\\n\\nNevertheless, the absence of a distribution doesn’t mean you shouldn’t use OTel for data collection from Go applications with the Elastic Stack.\\n\\nElastic currently has an APM Go agent, but we recommend switching to the OTel Go SDK. In this post, we cover two ways you can do that migration:\\n\\n- By replacing all telemetry in your application’s code (a “big bang migration”) and shipping the change\\n\\n- By splitting the migration into atomic changes, to reduce the risk of regressions\\n\\n## A big bang migration\\n\\nThe simplest way to migrate from our APM Go agent to the OTel SDK may be by removing all telemetry provided by the agent and replacing it all with the new one.\\n\\n### Automatic instrumentation\\n\\nMost of your instrumentation may be provided automatically, as it is part of the frameworks or libraries you are using.\\n\\nFor example, if you use the Elastic Go agent, you may be using our net/http auto instrumentation module like this:\\n\\n```go\\nimport (\\n\\t\\"net/http\\"\\n\\t\\"go.elastic.co/apm/module/apmhttp/v2\\"\\n)\\n\\n\\nfunc handler(w http.ResponseWriter, req *http.Request) {\\n\\tfmt.Fprintf(w, \\"Hello World!\\")\\n}\\n\\nfunc main() {\\n\\thttp.ListenAndServe(\\n \\":8080\\",\\n apmhttp.Wrap(http.HandlerFunc(handler)),\\n\\t)\\n}\\n```\\n\\nWith OpenTelemetry, you would use the otelhttp module instead:\\n\\n```go\\nimport (\\n\\t\\"net/http\\"\\n\\t\\"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp\\"\\n)\\n\\n\\nfunc handler(w http.ResponseWriter, req *http.Request) {\\n\\tfmt.Fprintf(w, \\"Hello World!\\")\\n}\\n\\nfunc main() {\\n\\thttp.ListenAndServe(\\n \\":8080\\",\\n otelhttp.NewHandler(http.HandlerFunc(handler), \\"http\\"),\\n\\t)\\n}\\n```\\n\\nYou should perform this same change for every other module you use from our agent.\\n\\n### Manual instrumentation\\n\\nYour application may also have manual instrumentations, which consist of creating traces and spans directly within your application code by calling the Elastic APM agent API.\\n\\nYou may be creating transactions and spans like this with Elastic’s APM SDK:\\n\\n```go\\nimport (\\n\\t\\"go.elastic.co/apm/v2\\"\\n)\\n\\nfunc main() {\\n // Create a transaction, and assign it to the context.\\n tx := apm.DefaultTracer().StartTransaction(\\"GET /\\", \\"request\\")\\n defer tx.End()\\n ctx = apm.ContextWithTransaction(ctx, tx)\\n\\n // Create a span\\n span, ctx := apm.StartSpan(ctx, \\"span\\")\\n defer span.End()\\n}\\n```\\n\\nOpenTelemetry uses the same API for both transactions and spans — what Elastic considers “transactions” are just considered spans with no parent in OTel (“root spans”).\\n\\nSo, your instrumentation becomes the following:\\n\\n```go\\nimport (\\n\\t\\"go.opentelemetry.io/otel/trace\\"\\n)\\n\\nfunc main() {\\n\\ttracer := otel.Tracer(\\"my library\\")\\n\\n\\t// Create a root span.\\n\\t// It is assigned to the returned context automatically.\\n\\tctx, span := tracer.Start(ctx, \\"GET /\\")\\n\\tdefer span.End()\\n\\n\\t// Create a child span (as the context has a parent).\\n\\tctx, span := tracer.Start(ctx, \\"span\\")\\n\\tdefer span.End()\\n}\\n```\\n\\nWith a big bang migration, you will need to migrate everything before shipping it to production. You cannot split the migration into smaller chunks.\\n\\nFor small applications or ones that only use automatic instrumentation, that constraint may be fine. It allows you to quickly validate the migration and move on.\\n\\nHowever, if you are working on a complex set of services, a large application, or one with a lot of manual instrumentation, you probably want to be able to ship code multiple times during the migration instead of all at once.\\n\\n## An atomic migration\\n\\nAn atomic migration would be one where you can ship atomic changes gradually and have your application keep working normally. Then, you are able to pull the final plug only at the end, once you are ready to do so.\\n\\nTo help with atomic migrations, we provide a [bridge between our APM Go agent and OpenTelemetry](https://www.elastic.co/guide/en/apm/agent/go/master/opentelemetry.html).\\n\\nThis bridge allows you to run both our agent and OTel alongside each other and to have instrumentations with both libraries in the same process with the data being transmitted to the same location and in the same format.\\n\\nYou can configure the OTel bridge with our agent like this:\\n\\n```go\\nimport (\\n\\t\\"go.elastic.co/apm/v2\\"\\n\\t\\"go.elastic.co/apm/module/apmotel/v2\\"\\n\\n\\t\\"go.opentelemetry.io/otel\\"\\n)\\n\\nfunc main() {\\n\\tprovider, err := apmotel.NewTracerProvider()\\n\\tif err != nil {\\n\\t\\tlog.Fatal(err)\\n\\t}\\n\\totel.SetTracerProvider(provider)\\n}\\n```\\n\\nOnce this configuration is set, every span created by OTel will be transmitted to the Elastic APM agent.\\n\\nWith this bridge, you can make your migration much safer with the following process:\\n\\n- Add the bridge to your application.\\n\\n- Switch one instrumentation (automatic or manual) from the agent to OpenTelemetry, as you would have done for the big bang migration above but a single one at a time.\\n\\n- Remove the bridge and our agent, and configure OpenTelemetry to transmit the data via its SDK.\\n\\nEach of those steps can be a single change within your application and go to production right away.\\n\\nIf any issue arises during the migration process, you should then be able to see it immediately and fix it before moving on.\\n\\n## Observability benefits from building with OTel\\n\\nAs OTel is quickly becoming an industry standard, and Elastic is committed to making it even better, it can be very beneficial to your engineering teams to migrate to it.\\n\\nIn Go, whether you do this through a big bang migration or using Elastic’s OTel bridge, doing so will allow you to benefit from instrumentations maintained by the global community to make your observability even more effective and to better understand what’s happening within your application.\\n\\n> - [Check out our code series on how to instrument with OpenTelemetry](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app)\\n> - [Go manual instrumentation with OpenTelemetry](https://www.elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting with OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n> - [Using AI to analyze OpenTelemetry issues](https://www.elastic.co/blog/analyzing-opentelemetry-apps-elastic-ai-assistant-apm)\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var m=Object.create;var o=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var b=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var i in e)o(n,i,{get:e[i],enumerable:!0})},l=(n,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of u(e))!y.call(n,a)&&a!==i&&o(n,a,{get:()=>e[a],enumerable:!(r=p(e,a))||r.enumerable});return n};var w=(n,e,i)=>(i=n!=null?m(g(n)):{},l(e||!n||!n.__esModule?o(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>l(o({},\\"__esModule\\",{value:!0}),n);var c=b((E,s)=>{s.exports=_jsx_runtime});var O={};f(O,{default:()=>d,frontmatter:()=>T});var t=w(c()),T={title:\\"Migrating from Elastic\\\\u2019s Go APM agent to OpenTelemetry Go SDK\\",slug:\\"elastic-go-apm-agent-to-opentelemetry-go-sdk\\",date:\\"2024-04-15\\",description:\\"As OpenTelemetry is fast becoming an industry standard, Elastic is fast adopting it as well. In this post, we show you a safe and easy way to migrate your Go application from our APM agent to OpenTelemetry.\\",author:[{slug:\\"damien-mathieu\\"}],image:\\"elastic-de-136675-V1_V1_(1).jpg\\",tags:[{slug:\\"go\\"},{slug:\\"opentelemetry\\"},{slug:\\"apm\\"}]};function h(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"As \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions\\",rel:\\"nofollow\\",children:\\"we\\\\u2019ve already shared\\"}),\\", Elastic is committed to helping OpenTelemetry (OTel) succeed, which means, in some cases, building distributions of language SDKs.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic is strategically standardizing on OTel for observability and security data collection. Additionally, Elastic is committed to working with the OTel community to become the best data collection infrastructure for the observability ecosystem. Elastic is deepening its relationship with OTel beyond the recent contributions of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-faq\\",rel:\\"nofollow\\",children:\\"Elastic Common Schema (ECS) to OpenTelemetry\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-invokedynamic-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"invokedynamic in the OTel Java agent\\"}),\\", and the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry\\",rel:\\"nofollow\\",children:\\"upcoming profiling agent donation\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Since Elastic version 7.14, Elastic has supported OTel natively by being able to directly ingest OpenTelemetry protocol (OTLP)-based traces, metrics, and logs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Go SDK is a bit different from the other language SDKs, as the Go language inherently lacks the dynamicity that would allow building a distribution that is not a fork.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Nevertheless, the absence of a distribution doesn\\\\u2019t mean you shouldn\\\\u2019t use OTel for data collection from Go applications with the Elastic Stack.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic currently has an APM Go agent, but we recommend switching to the OTel Go SDK. In this post, we cover two ways you can do that migration:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"By replacing all telemetry in your application\\\\u2019s code (a \\\\u201Cbig bang migration\\\\u201D) and shipping the change\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"By splitting the migration into atomic changes, to reduce the risk of regressions\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"a-big-bang-migration\\",children:\\"A big bang migration\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The simplest way to migrate from our APM Go agent to the OTel SDK may be by removing all telemetry provided by the agent and replacing it all with the new one.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"automatic-instrumentation\\",children:\\"Automatic instrumentation\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Most of your instrumentation may be provided automatically, as it is part of the frameworks or libraries you are using.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For example, if you use the Elastic Go agent, you may be using our net/http auto instrumentation module like this:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`import (\\n\\t\\"net/http\\"\\n\\t\\"go.elastic.co/apm/module/apmhttp/v2\\"\\n)\\n\\n\\nfunc handler(w http.ResponseWriter, req *http.Request) {\\n\\tfmt.Fprintf(w, \\"Hello World!\\")\\n}\\n\\nfunc main() {\\n\\thttp.ListenAndServe(\\n \\":8080\\",\\n apmhttp.Wrap(http.HandlerFunc(handler)),\\n\\t)\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With OpenTelemetry, you would use the otelhttp module instead:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`import (\\n\\t\\"net/http\\"\\n\\t\\"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp\\"\\n)\\n\\n\\nfunc handler(w http.ResponseWriter, req *http.Request) {\\n\\tfmt.Fprintf(w, \\"Hello World!\\")\\n}\\n\\nfunc main() {\\n\\thttp.ListenAndServe(\\n \\":8080\\",\\n otelhttp.NewHandler(http.HandlerFunc(handler), \\"http\\"),\\n\\t)\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You should perform this same change for every other module you use from our agent.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"manual-instrumentation\\",children:\\"Manual instrumentation\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Your application may also have manual instrumentations, which consist of creating traces and spans directly within your application code by calling the Elastic APM agent API.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You may be creating transactions and spans like this with Elastic\\\\u2019s APM SDK:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`import (\\n\\t\\"go.elastic.co/apm/v2\\"\\n)\\n\\nfunc main() {\\n // Create a transaction, and assign it to the context.\\n tx := apm.DefaultTracer().StartTransaction(\\"GET /\\", \\"request\\")\\n defer tx.End()\\n ctx = apm.ContextWithTransaction(ctx, tx)\\n\\n // Create a span\\n span, ctx := apm.StartSpan(ctx, \\"span\\")\\n defer span.End()\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"OpenTelemetry uses the same API for both transactions and spans \\\\u2014 what Elastic considers \\\\u201Ctransactions\\\\u201D are just considered spans with no parent in OTel (\\\\u201Croot spans\\\\u201D).\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"So, your instrumentation becomes the following:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`import (\\n\\t\\"go.opentelemetry.io/otel/trace\\"\\n)\\n\\nfunc main() {\\n\\ttracer := otel.Tracer(\\"my library\\")\\n\\n\\t// Create a root span.\\n\\t// It is assigned to the returned context automatically.\\n\\tctx, span := tracer.Start(ctx, \\"GET /\\")\\n\\tdefer span.End()\\n\\n\\t// Create a child span (as the context has a parent).\\n\\tctx, span := tracer.Start(ctx, \\"span\\")\\n\\tdefer span.End()\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With a big bang migration, you will need to migrate everything before shipping it to production. You cannot split the migration into smaller chunks.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For small applications or ones that only use automatic instrumentation, that constraint may be fine. It allows you to quickly validate the migration and move on.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"However, if you are working on a complex set of services, a large application, or one with a lot of manual instrumentation, you probably want to be able to ship code multiple times during the migration instead of all at once.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"an-atomic-migration\\",children:\\"An atomic migration\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"An atomic migration would be one where you can ship atomic changes gradually and have your application keep working normally. Then, you are able to pull the final plug only at the end, once you are ready to do so.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To help with atomic migrations, we provide a \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/go/master/opentelemetry.html\\",rel:\\"nofollow\\",children:\\"bridge between our APM Go agent and OpenTelemetry\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This bridge allows you to run both our agent and OTel alongside each other and to have instrumentations with both libraries in the same process with the data being transmitted to the same location and in the same format.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can configure the OTel bridge with our agent like this:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`import (\\n\\t\\"go.elastic.co/apm/v2\\"\\n\\t\\"go.elastic.co/apm/module/apmotel/v2\\"\\n\\n\\t\\"go.opentelemetry.io/otel\\"\\n)\\n\\nfunc main() {\\n\\tprovider, err := apmotel.NewTracerProvider()\\n\\tif err != nil {\\n\\t\\tlog.Fatal(err)\\n\\t}\\n\\totel.SetTracerProvider(provider)\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once this configuration is set, every span created by OTel will be transmitted to the Elastic APM agent.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With this bridge, you can make your migration much safer with the following process:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Add the bridge to your application.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Switch one instrumentation (automatic or manual) from the agent to OpenTelemetry, as you would have done for the big bang migration above but a single one at a time.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Remove the bridge and our agent, and configure OpenTelemetry to transmit the data via its SDK.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Each of those steps can be a single change within your application and go to production right away.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If any issue arises during the migration process, you should then be able to see it immediately and fix it before moving on.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"observability-benefits-from-building-with-otel\\",children:\\"Observability benefits from building with OTel\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As OTel is quickly becoming an industry standard, and Elastic is committed to making it even better, it can be very beneficial to your engineering teams to migrate to it.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In Go, whether you do this through a big bang migration or using Elastic\\\\u2019s OTel bridge, doing so will allow you to benefit from instrumentations maintained by the global community to make your observability even more effective and to better understand what\\\\u2019s happening within your application.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Check out our code series on how to instrument with OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Go manual instrumentation with OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting with OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/analyzing-opentelemetry-apps-elastic-ai-assistant-apm\\",rel:\\"nofollow\\",children:\\"Using AI to analyze OpenTelemetry issues\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(O);})();\\n;return Component;"},"_id":"articles/elastic-go-apm-agent-to-opentelemetry-go-sdk.mdx","_raw":{"sourceFilePath":"articles/elastic-go-apm-agent-to-opentelemetry-go-sdk.mdx","sourceFileName":"elastic-go-apm-agent-to-opentelemetry-go-sdk.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-go-apm-agent-to-opentelemetry-go-sdk"},"type":"Article","imageUrl":"/assets/images/elastic-go-apm-agent-to-opentelemetry-go-sdk/elastic-de-136675-V1_V1_(1).jpg","readingTime":"7 min read","url":"/elastic-go-apm-agent-to-opentelemetry-go-sdk","headings":[{"level":2,"title":"A big bang migration","href":"#a-big-bang-migration"},{"level":3,"title":"Automatic instrumentation","href":"#automatic-instrumentation"},{"level":3,"title":"Manual instrumentation","href":"#manual-instrumentation"},{"level":2,"title":"An atomic migration","href":"#an-atomic-migration"},{"level":2,"title":"Observability benefits from building with OTel","href":"#observability-benefits-from-building-with-otel"}]},{"title":"Elastic\'s contribution: Invokedynamic in the OpenTelemetry Java agent","slug":"invokedynamic-opentelemetry-java-agent","date":"2023-10-19","description":"The instrumentation approach in OpenTelemetry\'s Java Agent comes with some limitations with respect to maintenance and testability. Elastic contributes an invokedynamic-based instrumentation approach that helps overcoming these limitations.","image":"24-crystals.jpeg","author":[{"slug":"alexander-wert","type":"Author","_raw":{}},{"slug":"jack-shirazi","type":"Author","_raw":{}},{"slug":"jonas-kunz","type":"Author","_raw":{}},{"slug":"sylvain-juge","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"contribution","type":"Tag","_raw":{}},{"slug":"java","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs the second largest and active Cloud Native Computing Foundation (CNCF) project, [OpenTelemetry](https://opentelemetry.io/) is well on its way to becoming the ubiquitous, unified standard and framework for observability. OpenTelemetry owes this success to its comprehensive and feature-rich toolset that allows users to retrieve valuable observability data from their applications with low effort. The OpenTelemetry Java agent is one of the most mature and feature-rich components in OpenTelemetry’s ecosystem. It provides automatic instrumentation for JVM-based applications and comes with a broad coverage of auto-instrumentation modules for popular Java-frameworks and libraries.\\n\\nThe original instrumentation approach used in the OpenTelemetry Java agent left the maintenance and development of auto-instrumentation modules subject to some restrictions. As part of [our reinforced commitment to OpenTelemetry](https://www.elastic.co/blog/transforming-observability-ai-assistant-otel-standardization-continuous-profiling-log-analytics), Elastic\xae helps evolve and improve OpenTelemetry projects and components. [Elastic’s contribution of the Elastic Common Schema](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement) to OpenTelemetry was an important step for the open-source community. As another step in our commitment to OpenTelemetry, Elastic started contributing to the OpenTelemetry Java agent.\\n\\n## Elastic’s invokedynamic-based instrumentation approach\\n\\nTo overcome the above-mentioned limitations in developing and maintaining auto-instrumentation modules in the OpenTelemetry Java agent, Elastic started contributing its [**invokedynamic**](https://www.elastic.co/blog/embracing-invokedynamic-to-tame-class-loaders-in-java-agents)[-based instrumentation approach](https://www.elastic.co/blog/embracing-invokedynamic-to-tame-class-loaders-in-java-agents) to the OpenTelemetry Java agent in July 2023.\\n\\nTo explain the improvement, you should know that in Java, a common approach to do auto-instrumentation of applications is through utilizing Java agents that do bytecode instrumentation at runtime. [Byte Buddy](https://bytebuddy.net/#/) is a popular and widespread utility that helps with bytecode instrumentation without the need to deal with Java’s bytecode directly. Instrumentation logic that collects observability data from the target application’s code lives in so-called _advice methods_. Byte Buddy provides different ways of hooking these advice methods into the target application’s methods:\\n\\n- _Advice inlining:_ The advice method’s code is being copied into the instrumented target method.\\n- _Static advice dispatching:_ The instrumented target method invokes static advice methods that need to be visible by the instrumented code.\\n- _Advice dispatching with_ _ **invokedynamic** \\\\_\\\\_:_ The instrumented target method uses the JVM’s **invokedynamic** bytecode instruction to call advice methods that are isolated from the instrumented code.\\n\\nThese different approaches are described in great detail in our related blog post on [Elastic’s Java APM agent using invokedynamic](https://www.elastic.co/blog/embracing-invokedynamic-to-tame-class-loaders-in-java-agents). In a nutshell, both approaches, _advice inlining_ and _dispatching to static advice methods_ come with some limitations with respect to writing and maintaining the advice code. So far, the OpenTelemetry Java agent has used _advice inlining_ for its bytecode instrumentation. The resulting limitations on developing instrumentations are [documented in corresponding developer guidelines](https://github.com/open-telemetry/opentelemetry-java-instrumentation/blob/v1.30.0/docs/contributing/writing-instrumentation-module.md#use-advice-classes-to-write-code-that-will-get-injected-to-the-instrumented-library-classes). Among other things, the limitation of not being able to debug advice code is a painful restriction when developing and maintaining instrumentation code.\\n\\nElastic’s APM Java agent has been using the **invokedynamic** approach with its benefits for years — field-proven by thousands of customers. To help improve the OpenTelemetry Java agent, Elastic started contributing the **invokedynamic** approach with the goal to simplify and improve the development and maintainability of auto-instrumentation modules. The contribution proposal and the implementation outline is documented in more detail in [this GitHub issue](https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/8999).\\n\\nWith the new approach in place, Elastic will help migrate existing instrumentations so the OTel Java community can benefit from the **invokedynamic** -based instrumentation approach.\\n\\n> Elastic supports OTel natively, and has numerous capabilities to help you analyze your application with OTel. \\n>\\n> - [Native OpenTelemetry support in Elastic Observability](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Best Practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n>\\n> Instrumenting with OpenTelemetry:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry (this is the application the team built to highlight _all_ the languages below)\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual instrumentation ](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry) \\n> Go: [Manual instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var m=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var i in e)a(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of u(e))!w.call(n,o)&&o!==i&&a(n,o,{get:()=>e[o],enumerable:!(l=p(e,o))||l.enumerable});return n};var b=(n,e,i)=>(i=n!=null?m(g(n)):{},r(e||!n||!n.__esModule?a(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>r(a({},\\"__esModule\\",{value:!0}),n);var c=y((O,s)=>{s.exports=_jsx_runtime});var j={};f(j,{default:()=>d,frontmatter:()=>T});var t=b(c()),T={title:\\"Elastic\'s contribution: Invokedynamic in the OpenTelemetry Java agent\\",slug:\\"invokedynamic-opentelemetry-java-agent\\",date:\\"2023-10-19\\",description:\\"The instrumentation approach in OpenTelemetry\'s Java Agent comes with some limitations with respect to maintenance and testability. Elastic contributes an invokedynamic-based instrumentation approach that helps overcoming these limitations.\\",author:[{slug:\\"alexander-wert\\"},{slug:\\"jack-shirazi\\"},{slug:\\"jonas-kunz\\"},{slug:\\"sylvain-juge\\"}],image:\\"24-crystals.jpeg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"contribution\\"},{slug:\\"java\\"}]};function h(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",em:\\"em\\",h2:\\"h2\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"As the second largest and active Cloud Native Computing Foundation (CNCF) project, \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" is well on its way to becoming the ubiquitous, unified standard and framework for observability. OpenTelemetry owes this success to its comprehensive and feature-rich toolset that allows users to retrieve valuable observability data from their applications with low effort. The OpenTelemetry Java agent is one of the most mature and feature-rich components in OpenTelemetry\\\\u2019s ecosystem. It provides automatic instrumentation for JVM-based applications and comes with a broad coverage of auto-instrumentation modules for popular Java-frameworks and libraries.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The original instrumentation approach used in the OpenTelemetry Java agent left the maintenance and development of auto-instrumentation modules subject to some restrictions. As part of \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/transforming-observability-ai-assistant-otel-standardization-continuous-profiling-log-analytics\\",rel:\\"nofollow\\",children:\\"our reinforced commitment to OpenTelemetry\\"}),\\", Elastic\\\\xAE helps evolve and improve OpenTelemetry projects and components. \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s contribution of the Elastic Common Schema\\"}),\\" to OpenTelemetry was an important step for the open-source community. As another step in our commitment to OpenTelemetry, Elastic started contributing to the OpenTelemetry Java agent.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"elastics-invokedynamic-based-instrumentation-approach\\",children:\\"Elastic\\\\u2019s invokedynamic-based instrumentation approach\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To overcome the above-mentioned limitations in developing and maintaining auto-instrumentation modules in the OpenTelemetry Java agent, Elastic started contributing its \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/embracing-invokedynamic-to-tame-class-loaders-in-java-agents\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"invokedynamic\\"})}),(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/embracing-invokedynamic-to-tame-class-loaders-in-java-agents\\",rel:\\"nofollow\\",children:\\"-based instrumentation approach\\"}),\\" to the OpenTelemetry Java agent in July 2023.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To explain the improvement, you should know that in Java, a common approach to do auto-instrumentation of applications is through utilizing Java agents that do bytecode instrumentation at runtime. \\",(0,t.jsx)(e.a,{href:\\"https://bytebuddy.net/#/\\",rel:\\"nofollow\\",children:\\"Byte Buddy\\"}),\\" is a popular and widespread utility that helps with bytecode instrumentation without the need to deal with Java\\\\u2019s bytecode directly. Instrumentation logic that collects observability data from the target application\\\\u2019s code lives in so-called \\",(0,t.jsx)(e.em,{children:\\"advice methods\\"}),\\". Byte Buddy provides different ways of hooking these advice methods into the target application\\\\u2019s methods:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.em,{children:\\"Advice inlining:\\"}),\\" The advice method\\\\u2019s code is being copied into the instrumented target method.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.em,{children:\\"Static advice dispatching:\\"}),\\" The instrumented target method invokes static advice methods that need to be visible by the instrumented code.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.em,{children:\\"Advice dispatching with\\"}),\\" _ \\",(0,t.jsx)(e.strong,{children:\\"invokedynamic\\"}),\\" __:_ The instrumented target method uses the JVM\\\\u2019s \\",(0,t.jsx)(e.strong,{children:\\"invokedynamic\\"}),\\" bytecode instruction to call advice methods that are isolated from the instrumented code.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"These different approaches are described in great detail in our related blog post on \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/embracing-invokedynamic-to-tame-class-loaders-in-java-agents\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s Java APM agent using invokedynamic\\"}),\\". In a nutshell, both approaches, \\",(0,t.jsx)(e.em,{children:\\"advice inlining\\"}),\\" and \\",(0,t.jsx)(e.em,{children:\\"dispatching to static advice methods\\"}),\\" come with some limitations with respect to writing and maintaining the advice code. So far, the OpenTelemetry Java agent has used \\",(0,t.jsx)(e.em,{children:\\"advice inlining\\"}),\\" for its bytecode instrumentation. The resulting limitations on developing instrumentations are \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/blob/v1.30.0/docs/contributing/writing-instrumentation-module.md#use-advice-classes-to-write-code-that-will-get-injected-to-the-instrumented-library-classes\\",rel:\\"nofollow\\",children:\\"documented in corresponding developer guidelines\\"}),\\". Among other things, the limitation of not being able to debug advice code is a painful restriction when developing and maintaining instrumentation code.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic\\\\u2019s APM Java agent has been using the \\",(0,t.jsx)(e.strong,{children:\\"invokedynamic\\"}),\\" approach with its benefits for years \\\\u2014 field-proven by thousands of customers. To help improve the OpenTelemetry Java agent, Elastic started contributing the \\",(0,t.jsx)(e.strong,{children:\\"invokedynamic\\"}),\\" approach with the goal to simplify and improve the development and maintainability of auto-instrumentation modules. The contribution proposal and the implementation outline is documented in more detail in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/8999\\",rel:\\"nofollow\\",children:\\"this GitHub issue\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With the new approach in place, Elastic will help migrate existing instrumentations so the OTel Java community can benefit from the \\",(0,t.jsx)(e.strong,{children:\\"invokedynamic\\"}),\\" -based instrumentation approach.\\"]}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic supports OTel natively, and has numerous capabilities to help you analyze your application with OTel.\\\\xA0\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Native OpenTelemetry support in Elastic Observability\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best Practices for instrumenting OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Instrumenting with OpenTelemetry:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry (this is the application the team built to highlight \\",(0,t.jsx)(e.em,{children:\\"all\\"}),\\" the languages below)\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\\\xA0\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Go: \\",(0,t.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(j);})();\\n;return Component;"},"_id":"articles/elastic-invokedynamic-opentelemetry-java-agent.mdx","_raw":{"sourceFilePath":"articles/elastic-invokedynamic-opentelemetry-java-agent.mdx","sourceFileName":"elastic-invokedynamic-opentelemetry-java-agent.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-invokedynamic-opentelemetry-java-agent"},"type":"Article","imageUrl":"/assets/images/invokedynamic-opentelemetry-java-agent/24-crystals.jpeg","readingTime":"4 min read","url":"/invokedynamic-opentelemetry-java-agent","headings":[{"level":2,"title":"Elastic’s invokedynamic-based instrumentation approach","href":"#elastics-invokedynamic-based-instrumentation-approach"}]},{"title":"Elastic Observability monitors metrics for Google Cloud in just minutes","slug":"observability-monitors-metrics-google-cloud","date":"2023-11-20","description":"Follow this step-by-step process to enable Elastic Observability for Google Cloud Platform metrics.","image":"serverless-launch-blog-image.jpg","author":[{"slug":"jonathan-simon","type":"Author","_raw":{}},{"slug":"eric-lowry","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"google-cloud","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}}],"body":{"raw":"\\nDevelopers and SREs choose to host their applications on Google Cloud Platform (GCP) for its reliability, speed, and ease of use. On Google Cloud, development teams are finding additional value in migrating to Kubernetes on GKE, leveraging the latest serverless options like Cloud Run, and improving traditional, tiered applications with managed services.\\n\\nElastic Observability offers 16 out-of-the-box integrations for Google Cloud services with more on the way. A full list of Google Cloud integrations can be found in [our online documentation](https://docs.elastic.co/en/integrations/gcp).\\n\\nIn addition to our native Google Cloud integrations, Elastic Observability aggregates not only logs but also metrics for Google Cloud services and the applications running on Google Cloud compute services (Compute Engine, Cloud Run, Cloud Functions, Kubernetes Engine). All this data can be analyzed visually and more intuitively using Elastic\xae’s advanced machine learning (ML) capabilities, which help detect performance issues and surface root causes before end users are affected.\\n\\nFor more details on how Elastic Observability provides application performance monitoring (APM) capabilities such as service maps, tracing, dependencies, and ML based metrics correlations, read: [APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions).\\n\\nThat’s right, Elastic offers metrics ingest, aggregation, and analysis for Google Cloud services and applications on Google Cloud compute services. Elastic is more than logs — it offers a unified observability solution for Google Cloud environments.\\n\\nIn this blog, I’ll review how Elastic Observability can monitor metrics for a three-tier web application running on Google Cloud services, which include:\\n\\n- Google Cloud Run\\n- Google Cloud SQL for PostgreSQL\\n- Google Cloud Memorystore for Redis\\n- Google Cloud VPC Network\\n\\nAs you will see, once the integration is installed, metrics will arrive instantly and you can immediately start reviewing metrics.\\n\\n## Prerequisites and config\\n\\nHere are some of the components and details we used to set up this demonstration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)).\\n- Ensure you have a Google Cloud project and a Service Account with permissions to pull the necessary data from Google Cloud ([see details in our documentation](https://docs.elastic.co/en/integrations/gcp#authentication)).\\n- We used [Google Cloud’s three-tier app](https://cloud.google.com/architecture/application-development/three-tier-web-app) and deployed it using the Google Cloud console.\\n- We’ll walk through installing the general [Elastic Google Cloud Platform Integration](https://docs.elastic.co/en/integrations/gcp), which covers the services we want to collect metrics for.\\n- We will _not_ cover application monitoring; instead, we will focus on how Google Cloud services can be easily monitored.\\n- In order to see metrics, you will need to load the application. We’ve also created a playwright script to drive traffic to the application.\\n\\n## Three-tier application overview\\n\\nBefore we dive into the Elastic configuration, let\'s review what we are monitoring. If you follow the [Jump Start Solution: Three-tier web app](https://cloud.google.com/architecture/application-development/three-tier-web-app) instructions for[](https://github.com/aws-samples/aws-three-tier-web-architecture-workshop)deploying the task-tracking app, you will have the following deployed.\\n\\n![1](/assets/images/observability-monitors-metrics-google-cloud/1.png)\\n\\nWhat’s deployed:\\n\\n- Cloud Run frontend tier that renders an HTML client in the user\'s browser and enables user requests to be sent to the task-tracking app\\n- Cloud Run middle tier API layer that communicates with the frontend and the database tier\\n- Memorystore for Redis instance in the database tier, caching and serving data that is read frequently\\n- Cloud SQL for PostgreSQL instance in the database tier, handling requests that can\'t be served from the in-memory Redis cache\\n\\nAt the end of the blog, we will also provide a Playwright script that can be run to send requests to this app in order to load it with example data and exercise its functionality. This will help drive metrics to “light up” the dashboards.\\n\\n## Setting it all up\\n\\nLet’s walk through the details of how to get the application, Google Cloud integration on Elastic, and what gets ingested.\\n\\n### Step 0: Get an account on Elastic Cloud\\n\\nFollow the instructions to [get started on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home).\\n\\n![2 - start free trial](/assets/images/observability-monitors-metrics-google-cloud/2.png)\\n\\n### Step 1: Deploy the Google Cloud three-tier application\\n\\nFollow the instructions listed out in [Jump Start Solution: Three-tier web app](https://cloud.google.com/architecture/application-development/three-tier-web-app) choosing the **Deploy through the console** option for deployment.\\n\\n### Step 2: Create a Google Cloud Service Account and download credentials file\\n\\nOnce you’ve installed the app, the next step is to create a _Service Account_ with a _Role_ and a _Service Account Key_ that will be used by Elastic’s integration to access data in your Google Cloud project.\\n\\nGo to Google Cloud [IAM Roles](https://console.cloud.google.com/iam-admin/roles) to create a Role with the necessary permissions. Click the **CREATE ROLE** button.\\n\\n![3](/assets/images/observability-monitors-metrics-google-cloud/3.png)\\n\\nGive the Role a **Title** and an **ID**. Then add the 10 assigned permissions listed here.\\n\\n- cloudsql.instances.list\\n- compute.instances.list\\n- monitoring.metricDescriptors.list\\n- monitoring.timeSeries.list\\n- pubsub.subscriptions.consume\\n- pubsub.subscriptions.create\\n- pubsub.subscriptions.get\\n- pubsub.topics.attachSubscription\\n- redis.instances.list\\n- run.services.list\\n\\nThese permissions are a minimal set of what’s required for this blog post. You should add permissions for all the services for which you would like to collect metrics. If you need to add or remove permissions in the future, the Role’s permissions can be updated as many times as necessary.\\n\\nClick the **CREATE** button.\\n\\n![4](/assets/images/observability-monitors-metrics-google-cloud/4.png)\\n\\nGo to Google Cloud [IAM Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) to create a Service Account that will be used by the Elastic integration for access to Google Cloud. Click the **CREATE SERVICE ACCOUNT** button.\\n\\n![5](/assets/images/observability-monitors-metrics-google-cloud/5.png)\\n\\nEnter a **Service account name** and a **Service account ID.** Click the **CREATE AND CONTINUE** button.\\n\\n![6](/assets/images/observability-monitors-metrics-google-cloud/6.png)\\n\\nThen select the **Role** that you created previously and click the **CONTINUE** button.\\n\\n![7](/assets/images/observability-monitors-metrics-google-cloud/7.png)\\n\\nClick the **DONE** button to complete the Service Account creation process.\\n\\nNext select the Service Account you just created to see its details page. Under the **KEYS** tab, click the **ADD KEY** dropdown and select **Create new key**.\\n\\n![8](/assets/images/observability-monitors-metrics-google-cloud/8.png)\\n\\nIn the Create private key dialog window, with the **Key type** set as JSON, click the **CREATE** button.\\n\\n![9](/assets/images/observability-monitors-metrics-google-cloud/9.png)\\n\\nThe JSON credentials file key will be automatically downloaded to your local computer’s **Downloads** folder. The credentials file will be named something like:\\n\\n```bash\\nyour-project-id-12a1234b1234.json\\n```\\n\\nYou can rename the file to be something else. For the purpose of this blog, we’ll rename it to:\\n\\n```bash\\ncredentials.json\\n```\\n\\n### Step 3: Create a Google Cloud VM instance\\n\\nTo create the Compute Engine VM instance in Google Cloud, go to [Compute Engine](https://console.cloud.google.com/compute/instances). Then select **CREATE INSTANCE.**\\n\\n![10](/assets/images/observability-monitors-metrics-google-cloud/10.png)\\n\\nEnter the following values for the VM instance details:\\n\\n- Enter a **Name** of your choice for the VM instance.\\n- Expand the **Advanced Options** section and the **Networking** sub-section.\\n - Enter allow-ssh as the Networking tag.\\n - Select the **Network Interface** to use the **tiered-web-app-private-network** , which is the network on which the Google Cloud three-tier web app is deployed.\\n\\nClick the **CREATE** button to create the VM instance.\\n\\n![11](/assets/images/observability-monitors-metrics-google-cloud/11.png)\\n\\n### Step 4: SSH in to the Google Cloud VM instance and upload the credentials file\\n\\nIn order to SSH into the Google Cloud VM instance you just created in the previous step, you’ll need to create a Firewall rule in **tiered-web-app-private-network** , which is the network where the VM instance resides.\\n\\nGo to the Google Cloud [**Firewall policies**](https://console.cloud.google.com/net-security/firewall-manager/firewall-policies/list) page. Click the **CREATE FIREWALL RULE** button.\\n\\n![12](/assets/images/observability-monitors-metrics-google-cloud/12.png)\\n\\nEnter the following values for the Firewall Rule.\\n\\n- Enter a firewall rule **Name**.\\n- Select **tiered-web-app-private-network** for the **Network**.\\n- Enter allow-ssh for **Target Tags**.\\n- Enter 0.0.0.0/0 for the **Source IPv4 ranges**.Click **TCP** and set the **Ports** to **22**.\\n\\nClick **CREATE** to create the firewall rule.\\n\\n![13](/assets/images/observability-monitors-metrics-google-cloud/13.png)\\n\\nAfter the new Firewall rule is created, you can now SSH into your VM instance. Go to the [Google Cloud VM instances](https://console.cloud.google.com/compute/instances) and select the VM instance you created in the previous step to see its details page. Click the **SSH** button.\\n\\n![14](/assets/images/observability-monitors-metrics-google-cloud/14.png)\\n\\nOnce you are SSH’d inside the VM instance terminal window, click the **UPLOAD FILE** button.\\n\\n![15](/assets/images/observability-monitors-metrics-google-cloud/15.png)\\n\\nSelect the credentials.json file located on your local computer and click the **Upload Files** button to upload the file.\\n\\n![16](/assets/images/observability-monitors-metrics-google-cloud/16.png)\\n\\nIn the VM instance’s SSH terminal, run the following command to get the full path to your Google Cloud Service Account credentials file.\\n\\n```bash\\nrealpath credentials.json\\n```\\n\\nThis should return the full path to your Google Cloud Service Account credentials file.\\n\\n![17](/assets/images/observability-monitors-metrics-google-cloud/17.png)\\n\\nCopy the credentials file’s full path and save it in a handy location to be used in a later step.\\n\\n### Step 5: Add the Elastic Google Cloud integration\\n\\nNavigate to the Google Cloud Platform integration in Elastic by selecting **Integrations** from the top-level menu. Search for google and click the **Google Cloud Platform** tile.\\n\\n![18](/assets/images/observability-monitors-metrics-google-cloud/18.png)\\n\\nClick **Add Google Cloud Platform**.\\n\\n![19](/assets/images/observability-monitors-metrics-google-cloud/19.png)\\n\\nClick **Add integration only (skip agent installation)**.\\n\\n![20](/assets/images/observability-monitors-metrics-google-cloud/20.png)\\n\\nUpdate the **Project Id** input text box to be your Google Cloud Project ID. Next, paste in the credentials file’s full path into the **Credentials File** input text box.\\n\\n![21](/assets/images/observability-monitors-metrics-google-cloud/21.png)\\n\\nAs you can see, the general Elastic Google Cloud Platform Integration will collect a significant amount of data from 16 Google Cloud services. If you don’t want to install this general Elastic Google Cloud Platform Integration, you can select individual integrations to install. Click **Save and continue**.\\n\\nYou’ll be presented with a confirmation dialog window. Click **Add Elastic Agent to your hosts**.\\n\\n![22](/assets/images/observability-monitors-metrics-google-cloud/22.png)\\n\\nThis will display the instructions required to install the Elastic agent. Copy the command under the **Linux Tar** tab.\\n\\n![23](/assets/images/observability-monitors-metrics-google-cloud/23.png)\\n\\nNext you will need to use SSH to log in to the Google Cloud VM instance and run the commands copied from **Linux Tar** tab. Go to [Compute Engine](https://console.cloud.google.com/compute/instances). Then click the name of the VM instance that you created in Step 2. Log in to the VM by clicking the **SSH** button.\\n\\n![24 - instance](/assets/images/observability-monitors-metrics-google-cloud/14.png)\\n\\nOnce you are SSH’d inside the VM instance terminal window, run the commands copied previously from **Linux Tar tab** in the **Install Elastic Agent on your host** instructions.\\n\\nWhen the installation completes, you’ll see a confirmation message in the Install Elastic Agent on your host form. Click the **Add the integration** button.\\n\\n![25 - add agent](/assets/images/observability-monitors-metrics-google-cloud/25.png)\\n\\nExcellent! The Elastic agent is sending data to Elastic Cloud. Now let’s observe some metrics.\\n\\n### Step 6: Run traffic against the application\\n\\nWhile getting the application running is fairly easy, there is nothing to monitor or observe with Elastic unless you add a load on the application.\\n\\nHere is a simple script you can also run using [Playwright](https://playwright.dev/) to add traffic and exercise the functionality of the Google Cloud three-tier application:\\n\\n```javascript\\nimport { test, expect } from \\"@playwright/test\\";\\n\\ntest(\\"homepage for Google Cloud Threetierapp\\", async ({ page }) => {\\n await page.goto(\\"https://tiered-web-app-fe-zg62dali3a-uc.a.run.app\\");\\n // Insert 2 todo items\\n await page.fill(\\"id=todo-new\\", (Math.random() * 100).toString());\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=todo-new\\", (Math.random() * 100).toString());\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n // Click one todo item\\n await page.getByRole(\\"checkbox\\").nth(0).check();\\n await page.waitForTimeout(1000);\\n // Delete one todo item\\n const deleteButton = page.getByText(\\"delete\\").nth(0);\\n await deleteButton.dispatchEvent(\\"click\\");\\n await page.waitForTimeout(4000);\\n});\\n```\\n\\n### Step 7: Go to Google Cloud dashboards in Elastic\\n\\nWith Elastic Agent running, you can go to Elastic Dashboards to view what’s being ingested. Simply search for “dashboard” in Elastic and choose **Dashboards.**\\n\\n![26 - dashboard](/assets/images/observability-monitors-metrics-google-cloud/26.png)\\n\\nThis will open the Elastic Dashboards page.\\n\\n![27](/assets/images/observability-monitors-metrics-google-cloud/27.png)\\n\\nIn the Dashboards search box, search for GCP and click the **[Metrics GCP] CloudSQL PostgreSQL Overview** dashboard, one of the many out-of-the-box dashboards available. Let’s see what comes up.\\n\\n![28](/assets/images/observability-monitors-metrics-google-cloud/28.png)\\n\\nOn the Cloud SQL dashboard, we can see the following sampling of some of the many available metrics:\\n\\n- Disk write ops\\n- CPU utilization\\n- Network sent and received bytes\\n- Transaction count\\n- Disk bytes used\\n- Disk quota\\n- Memory usage\\n- Disk read ops\\n\\nNext let’s take a look at metrics for Cloud Run.\\n\\n![29 - line graphs](/assets/images/observability-monitors-metrics-google-cloud/29.png)\\n\\nWe’ve created a custom dashboard using the **Create dashboard** button on the Elastic Dashboards page. Here we see a few of the numerous available metrics:\\n\\n- Container instance count\\n- CPU utilization for the three-tier app frontend and API\\n- Request count for the three-tier app frontend and API\\n- Bytes in and out of the API\\n\\n![30](/assets/images/observability-monitors-metrics-google-cloud/30.png)\\n\\nThis is a custom dashboard created for MemoryStore where we can see the following sampling of the available metrics:\\n\\n- Network traffic to the Memorystore Redis instance\\n- Count of the keys stored in Memorystore Redis\\n- CPU utilization of the Memorystore Redis instance\\n- Memory usage of the Memorystore Redis instance\\n\\n**Congratulations, you have now started monitoring metrics from key Google Cloud services for your application!**\\n\\n## What to monitor on Google Cloud next?\\n\\n### Add logs from Google Cloud Services\\n\\nNow that metrics are being monitored, you can also now add logging. There are several options for ingesting logs.\\n\\nThe Google Cloud Platform Integration in the Elastic Agent has four separate logs settings: audit logs, firewall logs, VPC Flow logs, and DNS logs. Just ensure you turn on what you wish to receive.\\n\\n![31](/assets/images/observability-monitors-metrics-google-cloud/31.png)\\n\\n### Analyze your data with Elastic machine learning\\n\\nOnce metrics and logs (or either one) are in Elastic, start analyzing your data through Elastic’s ML capabilities. A great review of these features can be found here:\\n\\n- [Correlating APM Telemetry to determine root causes in transactions](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions)\\n- [Introduction to Elastic Machine Learning](https://www.elastic.co/elasticon/archive/2020/global/machine-learning-and-the-elastic-stack-everywhere-you-need-it)\\n\\n## Conclusion: Monitoring Google Cloud service metrics with Elastic Observability is easy!\\n\\nI hope you’ve gotten an appreciation for how Elastic Observability can help you monitor Google Cloud service metrics. Here’s a quick recap of lessons and what you learned:\\n\\n- Elastic Observability supports ingest and analysis of Google Cloud service metrics.\\n- It’s easy to set up ingest from Google Cloud services via the Elastic Agent.\\n- Elastic Observability has multiple out-of-the-box Google Cloud service dashboards you can use to preliminarily review information and then modify for your needs.\\n- For metrics not covered by out-of-the-box dashboards, custom dashboards can be easily created to visualize metrics that are important to you.\\n- 16 Google Cloud services are supported as part of Google Cloud Platform Integration on Elastic Observability, with more services being added regularly.\\n- As noted in related blogs, you can analyze your Google Cloud service metrics with Elastic’s machine learning capabilities.\\n\\nTry it out for yourself by signing up via [Google Cloud Marketplace](https://console.cloud.google.com/marketplace/product/elastic-prod/elastic-cloud) and quickly spin up a deployment in minutes on any of the [Elastic Cloud regions on Google Cloud](https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_google_cloud_platform_gcp_regions) around the world. Your Google Cloud Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with Google Cloud.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var g=Object.create;var l=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var b=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),f=(i,e)=>{for(var o in e)l(i,o,{get:e[o],enumerable:!0})},s=(i,e,o,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of p(e))!w.call(i,n)&&n!==o&&l(i,n,{get:()=>e[n],enumerable:!(r=u(e,n))||r.enumerable});return i};var y=(i,e,o)=>(o=i!=null?g(m(i)):{},s(e||!i||!i.__esModule?l(o,\\"default\\",{value:i,enumerable:!0}):o,i)),v=i=>s(l({},\\"__esModule\\",{value:!0}),i);var c=b((k,a)=>{a.exports=_jsx_runtime});var E={};f(E,{default:()=>h,frontmatter:()=>C});var t=y(c()),C={title:\\"Elastic Observability monitors metrics for Google Cloud in just minutes\\",slug:\\"observability-monitors-metrics-google-cloud\\",date:\\"2023-11-20\\",description:\\"Follow this step-by-step process to enable Elastic Observability for Google Cloud Platform metrics.\\",author:[{slug:\\"jonathan-simon\\"},{slug:\\"eric-lowry\\"}],image:\\"serverless-launch-blog-image.jpg\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"google-cloud\\"},{slug:\\"metrics\\"}]};function d(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"Developers and SREs choose to host their applications on Google Cloud Platform (GCP) for its reliability, speed, and ease of use. On Google Cloud, development teams are finding additional value in migrating to Kubernetes on GKE, leveraging the latest serverless options like Cloud Run, and improving traditional, tiered applications with managed services.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic Observability offers 16 out-of-the-box integrations for Google Cloud services with more on the way. A full list of Google Cloud integrations can be found in \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/gcp\\",rel:\\"nofollow\\",children:\\"our online documentation\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to our native Google Cloud integrations, Elastic Observability aggregates not only logs but also metrics for Google Cloud services and the applications running on Google Cloud compute services (Compute Engine, Cloud Run, Cloud Functions, Kubernetes Engine). All this data can be analyzed visually and more intuitively using Elastic\\\\xAE\\\\u2019s advanced machine learning (ML) capabilities, which help detect performance issues and surface root causes before end users are affected.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For more details on how Elastic Observability provides application performance monitoring (APM) capabilities such as service maps, tracing, dependencies, and ML based metrics correlations, read: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"That\\\\u2019s right, Elastic offers metrics ingest, aggregation, and analysis for Google Cloud services and applications on Google Cloud compute services. Elastic is more than logs \\\\u2014 it offers a unified observability solution for Google Cloud environments.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, I\\\\u2019ll review how Elastic Observability can monitor metrics for a three-tier web application running on Google Cloud services, which include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Google Cloud Run\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Google Cloud SQL for PostgreSQL\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Google Cloud Memorystore for Redis\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Google Cloud VPC Network\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you will see, once the integration is installed, metrics will arrive instantly and you can immediately start reviewing metrics.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here are some of the components and details we used to set up this demonstration:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Ensure you have an account on \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\").\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Ensure you have a Google Cloud project and a Service Account with permissions to pull the necessary data from Google Cloud (\\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/gcp#authentication\\",rel:\\"nofollow\\",children:\\"see details in our documentation\\"}),\\").\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"We used \\",(0,t.jsx)(e.a,{href:\\"https://cloud.google.com/architecture/application-development/three-tier-web-app\\",rel:\\"nofollow\\",children:\\"Google Cloud\\\\u2019s three-tier app\\"}),\\" and deployed it using the Google Cloud console.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"We\\\\u2019ll walk through installing the general \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/gcp\\",rel:\\"nofollow\\",children:\\"Elastic Google Cloud Platform Integration\\"}),\\", which covers the services we want to collect metrics for.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"We will \\",(0,t.jsx)(e.em,{children:\\"not\\"}),\\" cover application monitoring; instead, we will focus on how Google Cloud services can be easily monitored.\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"In order to see metrics, you will need to load the application. We\\\\u2019ve also created a playwright script to drive traffic to the application.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"three-tier-application-overview\\",children:\\"Three-tier application overview\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Before we dive into the Elastic configuration, let\'s review what we are monitoring. If you follow the \\",(0,t.jsx)(e.a,{href:\\"https://cloud.google.com/architecture/application-development/three-tier-web-app\\",rel:\\"nofollow\\",children:\\"Jump Start Solution: Three-tier web app\\"}),\\" instructions for\\",(0,t.jsx)(e.a,{href:\\"https://github.com/aws-samples/aws-three-tier-web-architecture-workshop\\",rel:\\"nofollow\\"}),\\"deploying the task-tracking app, you will have the following deployed.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/1.png\\",alt:\\"1\\",width:\\"1008\\",height:\\"624\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"What\\\\u2019s deployed:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Cloud Run frontend tier that renders an HTML client in the user\'s browser and enables user requests to be sent to the task-tracking app\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Cloud Run middle tier API layer that communicates with the frontend and the database tier\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Memorystore for Redis instance in the database tier, caching and serving data that is read frequently\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Cloud SQL for PostgreSQL instance in the database tier, handling requests that can\'t be served from the in-memory Redis cache\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"At the end of the blog, we will also provide a Playwright script that can be run to send requests to this app in order to load it with example data and exercise its functionality. This will help drive metrics to \\\\u201Clight up\\\\u201D the dashboards.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"setting-it-all-up\\",children:\\"Setting it all up\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s walk through the details of how to get the application, Google Cloud integration on Elastic, and what gets ingested.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-0-get-an-account-on-elastic-cloud\\",children:\\"Step 0: Get an account on Elastic Cloud\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Follow the instructions to \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/2.png\\",alt:\\"2 - start free trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-deploy-the-google-cloud-three-tier-application\\",children:\\"Step 1: Deploy the Google Cloud three-tier application\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Follow the instructions listed out in \\",(0,t.jsx)(e.a,{href:\\"https://cloud.google.com/architecture/application-development/three-tier-web-app\\",rel:\\"nofollow\\",children:\\"Jump Start Solution: Three-tier web app\\"}),\\" choosing the \\",(0,t.jsx)(e.strong,{children:\\"Deploy through the console\\"}),\\" option for deployment.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-create-a-google-cloud-service-account-and-download-credentials-file\\",children:\\"Step 2: Create a Google Cloud Service Account and download credentials file\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once you\\\\u2019ve installed the app, the next step is to create a \\",(0,t.jsx)(e.em,{children:\\"Service Account\\"}),\\" with a \\",(0,t.jsx)(e.em,{children:\\"Role\\"}),\\" and a \\",(0,t.jsx)(e.em,{children:\\"Service Account Key\\"}),\\" that will be used by Elastic\\\\u2019s integration to access data in your Google Cloud project.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Go to Google Cloud \\",(0,t.jsx)(e.a,{href:\\"https://console.cloud.google.com/iam-admin/roles\\",rel:\\"nofollow\\",children:\\"IAM Roles\\"}),\\" to create a Role with the necessary permissions. Click the \\",(0,t.jsx)(e.strong,{children:\\"CREATE ROLE\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/3.png\\",alt:\\"3\\",width:\\"546\\",height:\\"201\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Give the Role a \\",(0,t.jsx)(e.strong,{children:\\"Title\\"}),\\" and an \\",(0,t.jsx)(e.strong,{children:\\"ID\\"}),\\". Then add the 10 assigned permissions listed here.\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"cloudsql.instances.list\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"compute.instances.list\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"monitoring.metricDescriptors.list\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"monitoring.timeSeries.list\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"pubsub.subscriptions.consume\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"pubsub.subscriptions.create\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"pubsub.subscriptions.get\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"pubsub.topics.attachSubscription\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"redis.instances.list\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"run.services.list\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"These permissions are a minimal set of what\\\\u2019s required for this blog post. You should add permissions for all the services for which you would like to collect metrics. If you need to add or remove permissions in the future, the Role\\\\u2019s permissions can be updated as many times as necessary.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"CREATE\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/4.png\\",alt:\\"4\\",width:\\"1037\\",height:\\"1171\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Go to Google Cloud \\",(0,t.jsx)(e.a,{href:\\"https://console.cloud.google.com/iam-admin/serviceaccounts\\",rel:\\"nofollow\\",children:\\"IAM Service Accounts\\"}),\\" to create a Service Account that will be used by the Elastic integration for access to Google Cloud. Click the \\",(0,t.jsx)(e.strong,{children:\\"CREATE SERVICE ACCOUNT\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/5.png\\",alt:\\"5\\",width:\\"786\\",height:\\"240\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Enter a \\",(0,t.jsx)(e.strong,{children:\\"Service account name\\"}),\\" and a \\",(0,t.jsx)(e.strong,{children:\\"Service account ID.\\"}),\\" Click the \\",(0,t.jsx)(e.strong,{children:\\"CREATE AND CONTINUE\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/6.png\\",alt:\\"6\\",width:\\"990\\",height:\\"1062\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then select the \\",(0,t.jsx)(e.strong,{children:\\"Role\\"}),\\" that you created previously and click the \\",(0,t.jsx)(e.strong,{children:\\"CONTINUE\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/7.png\\",alt:\\"7\\",width:\\"960\\",height:\\"978\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"DONE\\"}),\\" button to complete the Service Account creation process.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Next select the Service Account you just created to see its details page. Under the \\",(0,t.jsx)(e.strong,{children:\\"KEYS\\"}),\\" tab, click the \\",(0,t.jsx)(e.strong,{children:\\"ADD KEY\\"}),\\" dropdown and select \\",(0,t.jsx)(e.strong,{children:\\"Create new key\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/8.png\\",alt:\\"8\\",width:\\"858\\",height:\\"786\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the Create private key dialog window, with the \\",(0,t.jsx)(e.strong,{children:\\"Key type\\"}),\\" set as JSON, click the \\",(0,t.jsx)(e.strong,{children:\\"CREATE\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/9.png\\",alt:\\"9\\",width:\\"903\\",height:\\"645\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The JSON credentials file key will be automatically downloaded to your local computer\\\\u2019s \\",(0,t.jsx)(e.strong,{children:\\"Downloads\\"}),\\" folder. The credentials file will be named something like:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`your-project-id-12a1234b1234.json\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can rename the file to be something else. For the purpose of this blog, we\\\\u2019ll rename it to:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`credentials.json\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-3-create-a-google-cloud-vm-instance\\",children:\\"Step 3: Create a Google Cloud VM instance\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To create the Compute Engine VM instance in Google Cloud, go to \\",(0,t.jsx)(e.a,{href:\\"https://console.cloud.google.com/compute/instances\\",rel:\\"nofollow\\",children:\\"Compute Engine\\"}),\\". Then select \\",(0,t.jsx)(e.strong,{children:\\"CREATE INSTANCE.\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/10.png\\",alt:\\"10\\",width:\\"673\\",height:\\"162\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Enter the following values for the VM instance details:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Enter a \\",(0,t.jsx)(e.strong,{children:\\"Name\\"}),\\" of your choice for the VM instance.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Expand the \\",(0,t.jsx)(e.strong,{children:\\"Advanced Options\\"}),\\" section and the \\",(0,t.jsx)(e.strong,{children:\\"Networking\\"}),\\" sub-section.\\",`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Enter allow-ssh as the Networking tag.\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Select the \\",(0,t.jsx)(e.strong,{children:\\"Network Interface\\"}),\\" to use the \\",(0,t.jsx)(e.strong,{children:\\"tiered-web-app-private-network\\"}),\\" , which is the network on which the Google Cloud three-tier web app is deployed.\\"]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"CREATE\\"}),\\" button to create the VM instance.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/11.png\\",alt:\\"11\\",width:\\"1178\\",height:\\"1999\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-4-ssh-in-to-the-google-cloud-vm-instance-and-upload-the-credentials-file\\",children:\\"Step 4: SSH in to the Google Cloud VM instance and upload the credentials file\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In order to SSH into the Google Cloud VM instance you just created in the previous step, you\\\\u2019ll need to create a Firewall rule in \\",(0,t.jsx)(e.strong,{children:\\"tiered-web-app-private-network\\"}),\\" , which is the network where the VM instance resides.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Go to the Google Cloud \\",(0,t.jsx)(e.a,{href:\\"https://console.cloud.google.com/net-security/firewall-manager/firewall-policies/list\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"Firewall policies\\"})}),\\" page. Click the \\",(0,t.jsx)(e.strong,{children:\\"CREATE FIREWALL RULE\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/12.png\\",alt:\\"12\\",width:\\"1321\\",height:\\"180\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Enter the following values for the Firewall Rule.\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Enter a firewall rule \\",(0,t.jsx)(e.strong,{children:\\"Name\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Select \\",(0,t.jsx)(e.strong,{children:\\"tiered-web-app-private-network\\"}),\\" for the \\",(0,t.jsx)(e.strong,{children:\\"Network\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Enter allow-ssh for \\",(0,t.jsx)(e.strong,{children:\\"Target Tags\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Enter 0.0.0.0/0 for the \\",(0,t.jsx)(e.strong,{children:\\"Source IPv4 ranges\\"}),\\".Click \\",(0,t.jsx)(e.strong,{children:\\"TCP\\"}),\\" and set the \\",(0,t.jsx)(e.strong,{children:\\"Ports\\"}),\\" to \\",(0,t.jsx)(e.strong,{children:\\"22\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"CREATE\\"}),\\" to create the firewall rule.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/13.png\\",alt:\\"13\\",width:\\"890\\",height:\\"1999\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"After the new Firewall rule is created, you can now SSH into your VM instance. Go to the \\",(0,t.jsx)(e.a,{href:\\"https://console.cloud.google.com/compute/instances\\",rel:\\"nofollow\\",children:\\"Google Cloud VM instances\\"}),\\" and select the VM instance you created in the previous step to see its details page. Click the \\",(0,t.jsx)(e.strong,{children:\\"SSH\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/14.png\\",alt:\\"14\\",width:\\"927\\",height:\\"423\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once you are SSH\\\\u2019d inside the VM instance terminal window, click the \\",(0,t.jsx)(e.strong,{children:\\"UPLOAD FILE\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/15.png\\",alt:\\"15\\",width:\\"1440\\",height:\\"297\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select the credentials.json file located on your local computer and click the \\",(0,t.jsx)(e.strong,{children:\\"Upload Files\\"}),\\" button to upload the file.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/16.png\\",alt:\\"16\\",width:\\"384\\",height:\\"403\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the VM instance\\\\u2019s SSH terminal, run the following command to get the full path to your Google Cloud Service Account credentials file.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`realpath credentials.json\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This should return the full path to your Google Cloud Service Account credentials file.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/17.png\\",alt:\\"17\\",width:\\"891\\",height:\\"187\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Copy the credentials file\\\\u2019s full path and save it in a handy location to be used in a later step.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-5-add-the-elastic-google-cloud-integration\\",children:\\"Step 5: Add the Elastic Google Cloud integration\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Navigate to the Google Cloud Platform integration in Elastic by selecting \\",(0,t.jsx)(e.strong,{children:\\"Integrations\\"}),\\" from the top-level menu. Search for google and click the \\",(0,t.jsx)(e.strong,{children:\\"Google Cloud Platform\\"}),\\" tile.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/18.png\\",alt:\\"18\\",width:\\"1102\\",height:\\"603\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Add Google Cloud Platform\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/19.png\\",alt:\\"19\\",width:\\"1650\\",height:\\"1272\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Add integration only (skip agent installation)\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/20.png\\",alt:\\"20\\",width:\\"1197\\",height:\\"449\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Update the \\",(0,t.jsx)(e.strong,{children:\\"Project Id\\"}),\\" input text box to be your Google Cloud Project ID. Next, paste in the credentials file\\\\u2019s full path into the \\",(0,t.jsx)(e.strong,{children:\\"Credentials File\\"}),\\" input text box.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/21.png\\",alt:\\"21\\",width:\\"674\\",height:\\"1795\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As you can see, the general Elastic Google Cloud Platform Integration will collect a significant amount of data from 16 Google Cloud services. If you don\\\\u2019t want to install this general Elastic Google Cloud Platform Integration, you can select individual integrations to install. Click \\",(0,t.jsx)(e.strong,{children:\\"Save and continue\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You\\\\u2019ll be presented with a confirmation dialog window. Click \\",(0,t.jsx)(e.strong,{children:\\"Add Elastic Agent to your hosts\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/22.png\\",alt:\\"22\\",width:\\"792\\",height:\\"226\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This will display the instructions required to install the Elastic agent. Copy the command under the \\",(0,t.jsx)(e.strong,{children:\\"Linux Tar\\"}),\\" tab.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/23.png\\",alt:\\"23\\",width:\\"944\\",height:\\"1130\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Next you will need to use SSH to log in to the Google Cloud VM instance and run the commands copied from \\",(0,t.jsx)(e.strong,{children:\\"Linux Tar\\"}),\\" tab. Go to \\",(0,t.jsx)(e.a,{href:\\"https://console.cloud.google.com/compute/instances\\",rel:\\"nofollow\\",children:\\"Compute Engine\\"}),\\". Then click the name of the VM instance that you created in Step 2. Log in to the VM by clicking the \\",(0,t.jsx)(e.strong,{children:\\"SSH\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/14.png\\",alt:\\"24 - instance\\",width:\\"927\\",height:\\"423\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once you are SSH\\\\u2019d inside the VM instance terminal window, run the commands copied previously from \\",(0,t.jsx)(e.strong,{children:\\"Linux Tar tab\\"}),\\" in the \\",(0,t.jsx)(e.strong,{children:\\"Install Elastic Agent on your host\\"}),\\" instructions.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"When the installation completes, you\\\\u2019ll see a confirmation message in the Install Elastic Agent on your host form. Click the \\",(0,t.jsx)(e.strong,{children:\\"Add the integration\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/25.png\\",alt:\\"25 - add agent\\",width:\\"574\\",height:\\"718\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Excellent! The Elastic agent is sending data to Elastic Cloud. Now let\\\\u2019s observe some metrics.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-6-run-traffic-against-the-application\\",children:\\"Step 6: Run traffic against the application\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"While getting the application running is fairly easy, there is nothing to monitor or observe with Elastic unless you add a load on the application.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Here is a simple script you can also run using \\",(0,t.jsx)(e.a,{href:\\"https://playwright.dev/\\",rel:\\"nofollow\\",children:\\"Playwright\\"}),\\" to add traffic and exercise the functionality of the Google Cloud three-tier application:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-javascript\\",children:`import { test, expect } from \\"@playwright/test\\";\\n\\ntest(\\"homepage for Google Cloud Threetierapp\\", async ({ page }) => {\\n await page.goto(\\"https://tiered-web-app-fe-zg62dali3a-uc.a.run.app\\");\\n // Insert 2 todo items\\n await page.fill(\\"id=todo-new\\", (Math.random() * 100).toString());\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=todo-new\\", (Math.random() * 100).toString());\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n // Click one todo item\\n await page.getByRole(\\"checkbox\\").nth(0).check();\\n await page.waitForTimeout(1000);\\n // Delete one todo item\\n const deleteButton = page.getByText(\\"delete\\").nth(0);\\n await deleteButton.dispatchEvent(\\"click\\");\\n await page.waitForTimeout(4000);\\n});\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-7-go-to-google-cloud-dashboards-in-elastic\\",children:\\"Step 7: Go to Google Cloud dashboards in Elastic\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With Elastic Agent running, you can go to Elastic Dashboards to view what\\\\u2019s being ingested. Simply search for \\\\u201Cdashboard\\\\u201D in Elastic and choose \\",(0,t.jsx)(e.strong,{children:\\"Dashboards.\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/26.png\\",alt:\\"26 - dashboard\\",width:\\"922\\",height:\\"148\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This will open the Elastic Dashboards page.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/27.png\\",alt:\\"27\\",width:\\"878\\",height:\\"344\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the Dashboards search box, search for GCP and click the \\",(0,t.jsx)(e.strong,{children:\\"[Metrics GCP] CloudSQL PostgreSQL Overview\\"}),\\" dashboard, one of the many out-of-the-box dashboards available. Let\\\\u2019s see what comes up.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/28.png\\",alt:\\"28\\",width:\\"1440\\",height:\\"1717\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"On the Cloud SQL dashboard, we can see the following sampling of some of the many available metrics:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Disk write ops\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"CPU utilization\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Network sent and received bytes\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Transaction count\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Disk bytes used\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Disk quota\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Memory usage\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Disk read ops\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Next let\\\\u2019s take a look at metrics for Cloud Run.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/29.png\\",alt:\\"29 - line graphs\\",width:\\"1323\\",height:\\"874\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We\\\\u2019ve created a custom dashboard using the \\",(0,t.jsx)(e.strong,{children:\\"Create dashboard\\"}),\\" button on the Elastic Dashboards page. Here we see a few of the numerous available metrics:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Container instance count\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"CPU utilization for the three-tier app frontend and API\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Request count for the three-tier app frontend and API\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Bytes in and out of the API\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/30.png\\",alt:\\"30\\",width:\\"1440\\",height:\\"912\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is a custom dashboard created for MemoryStore where we can see the following sampling of the available metrics:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Network traffic to the Memorystore Redis instance\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Count of the keys stored in Memorystore Redis\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"CPU utilization of the Memorystore Redis instance\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Memory usage of the Memorystore Redis instance\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Congratulations, you have now started monitoring metrics from key Google Cloud services for your application!\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-to-monitor-on-google-cloud-next\\",children:\\"What to monitor on Google Cloud next?\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"add-logs-from-google-cloud-services\\",children:\\"Add logs from Google Cloud Services\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that metrics are being monitored, you can also now add logging. There are several options for ingesting logs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Google Cloud Platform Integration in the Elastic Agent has four separate logs settings: audit logs, firewall logs, VPC Flow logs, and DNS logs. Just ensure you turn on what you wish to receive.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-google-cloud/31.png\\",alt:\\"31\\",width:\\"633\\",height:\\"1482\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"analyze-your-data-with-elastic-machine-learning\\",children:\\"Analyze your data with Elastic machine learning\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once metrics and logs (or either one) are in Elastic, start analyzing your data through Elastic\\\\u2019s ML capabilities. A great review of these features can be found here:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"Correlating APM Telemetry to determine root causes in transactions\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/elasticon/archive/2020/global/machine-learning-and-the-elastic-stack-everywhere-you-need-it\\",rel:\\"nofollow\\",children:\\"Introduction to Elastic Machine Learning\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion-monitoring-google-cloud-service-metrics-with-elastic-observability-is-easy\\",children:\\"Conclusion: Monitoring Google Cloud service metrics with Elastic Observability is easy!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"I hope you\\\\u2019ve gotten an appreciation for how Elastic Observability can help you monitor Google Cloud service metrics. Here\\\\u2019s a quick recap of lessons and what you learned:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Elastic Observability supports ingest and analysis of Google Cloud service metrics.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"It\\\\u2019s easy to set up ingest from Google Cloud services via the Elastic Agent.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Elastic Observability has multiple out-of-the-box Google Cloud service dashboards you can use to preliminarily review information and then modify for your needs.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"For metrics not covered by out-of-the-box dashboards, custom dashboards can be easily created to visualize metrics that are important to you.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"16 Google Cloud services are supported as part of Google Cloud Platform Integration on Elastic Observability, with more services being added regularly.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"As noted in related blogs, you can analyze your Google Cloud service metrics with Elastic\\\\u2019s machine learning capabilities.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Try it out for yourself by signing up via \\",(0,t.jsx)(e.a,{href:\\"https://console.cloud.google.com/marketplace/product/elastic-prod/elastic-cloud\\",rel:\\"nofollow\\",children:\\"Google Cloud Marketplace\\"}),\\" and quickly spin up a deployment in minutes on any of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_google_cloud_platform_gcp_regions\\",rel:\\"nofollow\\",children:\\"Elastic Cloud regions on Google Cloud\\"}),\\" around the world. Your Google Cloud Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with Google Cloud.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(d,{...i})}):d(i)}return v(E);})();\\n;return Component;"},"_id":"articles/elastic-observability-monitors-metrics-google-cloud.mdx","_raw":{"sourceFilePath":"articles/elastic-observability-monitors-metrics-google-cloud.mdx","sourceFileName":"elastic-observability-monitors-metrics-google-cloud.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-observability-monitors-metrics-google-cloud"},"type":"Article","imageUrl":"/assets/images/observability-monitors-metrics-google-cloud/serverless-launch-blog-image.jpg","readingTime":"13 min read","url":"/observability-monitors-metrics-google-cloud","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"Three-tier application overview","href":"#three-tier-application-overview"},{"level":2,"title":"Setting it all up","href":"#setting-it-all-up"},{"level":3,"title":"Step 0: Get an account on Elastic Cloud","href":"#step-0-get-an-account-on-elastic-cloud"},{"level":3,"title":"Step 1: Deploy the Google Cloud three-tier application","href":"#step-1-deploy-the-google-cloud-three-tier-application"},{"level":3,"title":"Step 2: Create a Google Cloud Service Account and download credentials file","href":"#step-2-create-a-google-cloud-service-account-and-download-credentials-file"},{"level":3,"title":"Step 3: Create a Google Cloud VM instance","href":"#step-3-create-a-google-cloud-vm-instance"},{"level":3,"title":"Step 4: SSH in to the Google Cloud VM instance and upload the credentials file","href":"#step-4-ssh-in-to-the-google-cloud-vm-instance-and-upload-the-credentials-file"},{"level":3,"title":"Step 5: Add the Elastic Google Cloud integration","href":"#step-5-add-the-elastic-google-cloud-integration"},{"level":3,"title":"Step 6: Run traffic against the application","href":"#step-6-run-traffic-against-the-application"},{"level":3,"title":"Step 7: Go to Google Cloud dashboards in Elastic","href":"#step-7-go-to-google-cloud-dashboards-in-elastic"},{"level":2,"title":"What to monitor on Google Cloud next?","href":"#what-to-monitor-on-google-cloud-next"},{"level":3,"title":"Add logs from Google Cloud Services","href":"#add-logs-from-google-cloud-services"},{"level":3,"title":"Analyze your data with Elastic machine learning","href":"#analyze-your-data-with-elastic-machine-learning"},{"level":2,"title":"Conclusion: Monitoring Google Cloud service metrics with Elastic Observability is easy!","href":"#conclusion-monitoring-google-cloud-service-metrics-with-elastic-observability-is-easy"}]},{"title":"Elastic Observability monitors metrics for Microsoft Azure in just minutes","slug":"observability-monitors-metrics-microsoft-azure","date":"2024-01-29","description":"Follow this step-by-step process to enable Elastic Observability for Microsoft Azure metrics.","image":"Azure_Dark_(1).png","author":[{"slug":"jonathan-simon","type":"Author","_raw":{}},{"slug":"hemant-malik","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"azure","type":"Tag","_raw":{}}],"body":{"raw":"\\nDevelopers and SREs choose Microsoft Azure to run their applications because it is a trustworthy world-class cloud platform. It has also proven itself over the years as an extremely powerful and reliable infrastructure for hosting business-critical applications.\\n\\nElastic Observability offers over 25 out-of-the-box integrations for Microsoft Azure services with more on the way. A full list of Azure integrations can be found in [our online documentation](https://docs.elastic.co/integrations/azure).\\n\\nElastic Observability aggregates not only logs but also metrics for Azure services and the applications running on Azure compute services (Virtual Machines, Functions, Kubernetes Service, etc.). All this data can be analyzed visually and more intuitively using Elastic\xae’s advanced machine learning (ML) capabilities, which help detect performance issues and surface root causes before end users are affected.\\n\\nFor more details on how Elastic Observability provides application performance monitoring (APM) capabilities such as service maps, tracing, dependencies, and ML-based metrics correlations, read [APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions).\\n\\nThat’s right, Elastic offers capabilities to collect, aggregate, and analyze metrics for Microsoft Azure services and applications running on Azure. Elastic Observability is for more than just capturing logs — it offers a unified observability solution for Microsoft Azure workloads.\\n\\nIn this blog, we’ll review how Elastic Observability can monitor metrics for a three-tier web application running on Microsoft Azure and leveraging:\\n\\n- Microsoft Azure Virtual Machines\\n- Microsoft Azure SQL database\\n- Microsoft Azure Virtual Network\\n\\nAs you will see, once the integration is installed, metrics will arrive instantly and you can immediately start deriving insights from metrics.\\n\\n## Prerequisites and config\\n\\nHere are some of the components and details we used to set up this demonstration:\\n\\n- Ensure you have a Microsoft Azure account and an Azure service principal with permission to read monitoring data from Microsoft Azure ([see details in our documentation](https://docs.elastic.co/integrations/azure_metrics/monitor#integration-specific-configuration-notes)).\\n- This post does _not_ cover application monitoring; instead, we will focus on how Microsoft Azure services can be easily monitored. If you want to get started with examples of application monitoring, see our [Hello World observability code samples](https://github.com/elastic/observability-examples/tree/main/azure/container-apps).\\n- In order to see metrics, you will need to load the application. We’ve also created a Playwright script to drive traffic to the application.\\n\\n## Three-tier application overview\\n\\nBefore we dive into the Elastic deployment setup and configuration, let\'s review what we are monitoring. If you follow the [Microsoft Learn N-tier example app](https://learn.microsoft.com/en-us/training/modules/n-tier-architecture/) instructions for deploying the \\"What\'s for Lunch?\\" app, you will have the following deployed.\\n\\n![three tier application overview](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-three-tier-application-overview.png)\\n\\nWhat’s deployed:\\n\\n- Microsoft Azure VM presentation tier that renders an HTML client in the user\'s browser and enables user requests to be sent to the “What’s for Lunch?” app\\n- Microsoft Azure VM application tier that communicates with the presentation and the database tier\\n- Microsoft Azure SQL instance in the database tier, handling requests from the application tier to store and serve data\\n\\nAt the end of the blog, we will also provide a Playwright script that can be run to send requests to this app in order to load it with example data and exercise its functionality. This will help drive metrics to “light up” the dashboards.\\n\\n## Setting it all up\\n\\nLet’s walk through the details of how to deploy the example three-tier application, Azure integration on Elastic and visualize what gets ingested in Elastic’s Kibana\xae dashboards.\\n\\n### Step 0: Get an account on Elastic Cloud\\n\\nFollow the instructions to [get started on Elastic Cloud](https://cloud.elastic.co/registration).\\n\\n![elastic cloud free trial sign up](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-free-trial.png)\\n\\n### Step 1: Deploy the Microsoft Azure three-tier application\\n\\nFrom the [Azure portal](https://portal.azure.com/), click the Cloud Shell icon at the top of the portal to open Cloud Shell…\\n\\n![open cloud shell](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-open-cloud-shell.png)\\n\\n… and when the Cloud Shell first opens, select **Bash** as the shell type to use.\\n\\n![cloud shell bash](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-cloud-shell-bash.png)\\n\\nIf you’re prompted that “You have no storage mounted,” then click the **Create storage** button to create a file store to be used for saving and editing files from Cloud Shell.\\n\\n![cloud shell create storage](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-storage.png)\\n\\nYou should now see the open Cloud Shell terminal.\\n\\n![cloud shell terminal](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-cloud-shell-terminal.png)\\n\\nRun the following command in Cloud Shell to define the environment variables that we’ll be using in the Cloud Shell commands required to deploy and view the sample application.\\n\\nBe sure to specify a valid RESOURCE_GROUP from your available [Resource Groups listed in the Azure portal](https://portal.azure.com/#view/HubsExtension/BrowseResourceGroups). Also specify a new password to replace the SpecifyNewPasswordHere placeholder text before running the command. See the Microsoft [password policy documentation](https://learn.microsoft.com/en-us/sql/relational-databases/security/password-policy?view=sql-server-ver16#password-complexity) for password requirements.\\n\\n```bash\\nRESOURCE_GROUP=\\"test\\"\\nAPP_PASSWORD=\\"SpecifyNewPasswordHere\\"\\n```\\n\\nRun the following az deployment group create command, which will deploy the example three-tier web app in around five minutes.\\n\\n```bash\\naz deployment group create --resource-group $RESOURCE_GROUP --template-uri https://raw.githubusercontent.com/MicrosoftDocs/mslearn-n-tier-architecture/master/Deployment/azuredeploy.json --parameters password=$APP_PASSWORD\\n```\\n\\nAfter the deployment has completed, run the following command, which returns the URL for the app.\\n\\n```bash\\naz deployment group show --output table --resource-group $RESOURCE_GROUP --name azuredeploy --query properties.outputs.webSiteUrl\\n```\\n\\nCopy the web app URL and paste it into a browser to view the example “What’s for Lunch?” web app.\\n\\n![whats for lunch app](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-whats-for-lunch.png)\\n\\n### Step 2: Create an Azure service principal and grant access permission\\n\\nGo to the [Microsoft Azure Portal](https://portal.azure.com/). Search for active directory and select **Microsoft Entra ID**.\\n\\n![search active directory](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-active-directory.png)\\n\\nCopy the **Tenant ID** for use in a later step in this blog post. This ID is required to configure Elastic Agent to connect to your Azure account.\\n\\n![your organization overview](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-your-organization-overview.png)\\n\\nIn the navigation pane, select **App registrations**.\\n\\n![your organization overview app registrations](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-your-organization-overview-app-registrations.png)\\n\\nThen click **New registration**.\\n\\n![your organization new registrations](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-your-organization-new-registration.png)\\n\\nType the name of your application (this tutorial uses three-tier-app-azure) and click **Register** (accept the default values for other settings).\\n\\n![register an application](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-register_an_application.png)\\n\\nCopy the **Application (client) ID** and save it for later. This ID is required to configure Elastic Agent to connect to your Azure account.\\n\\nIn the navigation pane, select **Certificates & secrets** , and then click **New client secret** to create a new security key.\\n\\n![three tier app new client secret](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-three-tier-app-new-client-secret.png)\\n\\nType a description of the secret and select an expiration. Click **Add** to create the client secret. Under **Value** , copy the secret value and save it (along with your client ID) for later.\\n\\nAfter creating the Azure service principal, you need to grant it the correct permissions. In the Azure Portal, search for and select **Subscriptions**.\\n\\n![three tier subscriptions](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-three-tier-subscriptions.png)\\n\\nIn the Subscriptions page, click the name of your subscription. On the subscription details page, copy your **Subscription ID** and save it for a later step.\\n\\n![subscription essentials copy](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-subscription-essentials-copy.png)\\n\\nIn the navigation pane, select **Access control (IAM)**.\\n\\n![subscription access control](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-subscription-access-control.png)\\n\\nClick **Add** and select **Add role assignment**.\\n\\n![subscription access control add role assignment](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-subscription-access-control-add-role-assignment.png)\\n\\nOn the **Role** tab, select the **Monitoring Reader** role and then click **Next**.\\n\\n![add role assignment monitoring reader](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-role-assignment-monitoring-readers.png)\\n\\nOn the **Members** tab, select the option to assign access to **User, group, or service principal**. Click **Select members** , and then search for and select the principal you created earlier. For the description, enter the name of your service principal. Click **Next** to review the role assignment.\\n\\n![add role assignment description](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-role-assignment-description.png)\\n\\nClick **Review + assign** to grant the service principal access to your subscription.\\n\\n![add role assignment review assign](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-role-assignment-review-assign.png)\\n\\n### Step 3: Create an Azure VM instance\\n\\nIn the Azure Portal, search for and select **Virtual machines**.\\n\\n![search virtual machines](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-search-virtual-machines.png)\\n\\nOn the **Virtual machines** page, click **+ Create** and select **Azure virtual machine**.\\n\\n![azure virtual machine](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-azure-virtual-machine.png)\\n\\nOn the Virtual machine creation page, enter a name like “metrics-vm” for the virtual machine name and select VM Size to be “Standard_D2s_v3 - 2 vcpus, 8 GiB memory.” Click the **Next : Disks** button.\\n\\n![create a virtual machine next disks](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-virtual-macine-next-disks.png)\\n\\nOn the **Disks** page, keep the default settings and click the **Next : Networking** button.\\n\\n![create a virtual machine next networking](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-virtual-machine-next-networking.png)\\n\\nOn the **Networking** page, demo-vnet should be selected for **Virtual network** and demo-biz-subnet should be selected for **Subnet**. These resources are created as part of the three-tier example app’s deployment that was done in Step 1.\\n\\nClick the **Review + create** button.\\n\\n![create virtual machine review create](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-virtual-machine-review-create.png)\\n\\nOn the **Review** page, click the **Create** button.\\n\\n![create virtual machine validation passed](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-virtual-machine-validation-passed.png)\\n\\n### Step 4: Install the Azure Resource Metrics integration\\n\\nIn your [Elastic Cloud](https://cloud.elastic.co/home) deployment, navigate to the Elastic Azure integrations by selecting **Integrations** from the top-level menu. Search for azure resource and click the **Azure Resource Metrics** tile.\\n\\n![integrations azure resource metrics](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-integrations-azure-resource-metrics.png)\\n\\nClick **Add Azure Resource Metrics.**\\n\\n![azure resource metrics](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-azure-resource-metrics.png)\\n\\nClick **Add integration only (skip agent installation)**.\\n\\n![add integration only](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-integration-only.png)\\n\\nEnter the values that you saved previously for Client ID, Client Secret, Tenant ID, and Subscription ID.\\n\\n![add azure resource metrics integration](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-azure-resource-metrics-integration.png)\\n\\nAs you can see, the Azure Resource Metrics integration will collect a significant amount of data from eight Azure services. Click **Save and continue**.\\n\\nYou’ll be presented with a confirmation dialog window. Click **Add Elastic Agent to your hosts**.\\n\\n![azure resource metrics integration added](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-azure-resource-metrics-integration-added.png)\\n\\nThis will display the instructions required to install the Elastic agent. Copy the command under the **Linux Tar** tab.\\n\\n![add agent linux tar](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-agent.png)\\n\\nNext you will need to use SSH to log in to the Azure VM instance and run the commands copied from **Linux Tar** tab. Go to [Azure Virtual Machines](https://portal.azure.com/#blade/HubsExtension/BrowseResourceBlade/resourceType/Microsoft.Compute/VirtualMachines) in the Azure portal. Then click the name of the VM instance that you created in Step 3.\\n\\n![metrics vm](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-metrics-vm.png)\\n\\nClick the **Select** button in the **SSH Using Azure CLI** section.\\n\\n![metrics vm connect](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-metrics-vm-connect.png)\\n\\nSelect the “I understand …” checkbox and then click the **Configure + connect** button.\\n\\n![ssh using azure cli](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-ssh-using-azure-cli.png)\\n\\nOnce you are SSH’d inside the VM instance terminal window, run the commands copied previously from **Linux Tar tab** in the **Install Elastic Agent on your host** instructions. When the installation completes, you’ll see a confirmation message in the Install Elastic Agent on your host form.\\n\\n![add agent confirmed](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-agent-confirmed.png)\\n\\nSuper! The Elastic agent is sending data to Elastic Cloud. Now let’s observe some metrics.\\n\\n### Step 5: Run traffic against the application\\n\\nWhile getting the application running is fairly easy, there is nothing to monitor or observe with Elastic unless you add a load on the application.\\n\\nHere is a simple script you can also run using [Playwright](https://playwright.dev/) to add traffic and exercise the functionality of the Azure three-tier application:\\n\\n```javascript\\nimport { test, expect } from \\"@playwright/test\\";\\n\\ntest(\\"homepage for Microsoft Azure three tier app\\", async ({ page }) => {\\n // Load web app\\n await page.goto(\\"https://20.172.198.231/\\");\\n // Add lunch suggestions\\n await page.fill(\\"id=txtAdd\\", \\"tacos\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"sushi\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"pizza\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"burgers\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"salad\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"sandwiches\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n // Click vote buttons\\n await page.getByRole(\\"button\\").nth(1).click();\\n await page.getByRole(\\"button\\").nth(3).click();\\n await page.getByRole(\\"button\\").nth(5).click();\\n await page.getByRole(\\"button\\").nth(7).click();\\n await page.getByRole(\\"button\\").nth(9).click();\\n await page.getByRole(\\"button\\").nth(11).click();\\n // Click remove buttons\\n await page.getByRole(\\"button\\").nth(12).click();\\n await page.getByRole(\\"button\\").nth(10).click();\\n await page.getByRole(\\"button\\").nth(8).click();\\n await page.getByRole(\\"button\\").nth(6).click();\\n await page.getByRole(\\"button\\").nth(4).click();\\n await page.getByRole(\\"button\\").nth(2).click();\\n});\\n```\\n\\n### Step 6: View Azure dashboards in Elastic\\n\\nWith Elastic Agent running, you can go to Elastic Dashboards to view what’s being ingested. Simply search for “dashboard” in Elastic and choose **Dashboard**.\\n\\n![dashboard](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-dashboard.png)\\n\\nThis will open the Elastic Dashboards page. In the Dashboards search box, search for azure vm and click the **[Azure Metrics] Compute VMs Overview** dashboard, one of the many out-of-the-box dashboards available.\\n\\n![dashboards create](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-dashboards-create.png)\\n\\nYou will see a Dashboard populated with your deployed application’s VM metrics.\\n\\n![azure compute vm](/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-azure-compute-vm.png)\\n\\nOn the Azure Compute VM dashboard, we can see the following sampling of some of the many available metrics:\\n\\n- CPU utilization\\n- Available memory\\n- Network sent and received bytes\\n- Disk writes and reads metrics\\n\\nFor metrics not covered by out-of-the-box dashboards, custom dashboards can be easily created to visualize metrics that are important to you.\\n\\n**Congratulations, you have now started monitoring metrics from Microsoft Azure services for your application!**\\n\\n## Analyze your data with Elastic AI Assistant\\n\\nOnce metrics and logs (or either one) are in Elastic, start analyzing your data with [context-aware insights using the Elastic AI Assistant for Observability](https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability).\\n\\n## Conclusion: Monitoring Microsoft Azure service metrics with Elastic Observability is easy!\\n\\nWe hope you’ve gotten an appreciation for how Elastic Observability can help you monitor Azure service metrics. Here’s a quick recap of what you learned:\\n\\n- Elastic Observability supports ingest and analysis of Azure service metrics.\\n- It’s easy to set up ingest from Azure services via the Elastic Agent.\\n- Elastic Observability has multiple out-of-the-box Azure service dashboards you can use to preliminarily review information and then modify for your needs.\\n\\nTry it out for yourself by signing up via [Microsoft Azure Marketplace](https://portal.azure.com/#view/Microsoft_Azure_Marketplace/GalleryItemDetailsBladeNopdl/id/elastic.ec-azure-pp) and quickly spin up a deployment in minutes on any of the [Elastic Cloud regions on Microsoft Azure](https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_azure_regions) around the world. Your Azure Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with Microsoft Azure.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var f=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),w=(i,e)=>{for(var r in e)a(i,r,{get:e[r],enumerable:!0})},o=(i,e,r,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of u(e))!b.call(i,n)&&n!==r&&a(i,n,{get:()=>e[n],enumerable:!(s=g(e,n))||s.enumerable});return i};var y=(i,e,r)=>(r=i!=null?p(m(i)):{},o(e||!i||!i.__esModule?a(r,\\"default\\",{value:i,enumerable:!0}):r,i)),v=i=>o(a({},\\"__esModule\\",{value:!0}),i);var c=f((M,l)=>{l.exports=_jsx_runtime});var A={};w(A,{default:()=>d,frontmatter:()=>z});var t=y(c()),z={title:\\"Elastic Observability monitors metrics for Microsoft Azure in just minutes\\",slug:\\"observability-monitors-metrics-microsoft-azure\\",date:\\"2024-01-29\\",description:\\"Follow this step-by-step process to enable Elastic Observability for Microsoft Azure metrics.\\",author:[{slug:\\"jonathan-simon\\"},{slug:\\"hemant-malik\\"}],image:\\"Azure_Dark_(1).png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"metrics\\"},{slug:\\"azure\\"}]};function h(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"Developers and SREs choose Microsoft Azure to run their applications because it is a trustworthy world-class cloud platform. It has also proven itself over the years as an extremely powerful and reliable infrastructure for hosting business-critical applications.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic Observability offers over 25 out-of-the-box integrations for Microsoft Azure services with more on the way. A full list of Azure integrations can be found in \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/azure\\",rel:\\"nofollow\\",children:\\"our online documentation\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic Observability aggregates not only logs but also metrics for Azure services and the applications running on Azure compute services (Virtual Machines, Functions, Kubernetes Service, etc.). All this data can be analyzed visually and more intuitively using Elastic\\\\xAE\\\\u2019s advanced machine learning (ML) capabilities, which help detect performance issues and surface root causes before end users are affected.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For more details on how Elastic Observability provides application performance monitoring (APM) capabilities such as service maps, tracing, dependencies, and ML-based metrics correlations, read \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"That\\\\u2019s right, Elastic offers capabilities to collect, aggregate, and analyze metrics for Microsoft Azure services and applications running on Azure. Elastic Observability is for more than just capturing logs \\\\u2014 it offers a unified observability solution for Microsoft Azure workloads.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we\\\\u2019ll review how Elastic Observability can monitor metrics for a three-tier web application running on Microsoft Azure and leveraging:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Microsoft Azure Virtual Machines\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Microsoft Azure SQL database\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Microsoft Azure Virtual Network\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you will see, once the integration is installed, metrics will arrive instantly and you can immediately start deriving insights from metrics.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here are some of the components and details we used to set up this demonstration:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Ensure you have a Microsoft Azure account and an Azure service principal with permission to read monitoring data from Microsoft Azure (\\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/azure_metrics/monitor#integration-specific-configuration-notes\\",rel:\\"nofollow\\",children:\\"see details in our documentation\\"}),\\").\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"This post does \\",(0,t.jsx)(e.em,{children:\\"not\\"}),\\" cover application monitoring; instead, we will focus on how Microsoft Azure services can be easily monitored. If you want to get started with examples of application monitoring, see our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/azure/container-apps\\",rel:\\"nofollow\\",children:\\"Hello World observability code samples\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"In order to see metrics, you will need to load the application. We\\\\u2019ve also created a Playwright script to drive traffic to the application.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"three-tier-application-overview\\",children:\\"Three-tier application overview\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Before we dive into the Elastic deployment setup and configuration, let\'s review what we are monitoring. If you follow the \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/training/modules/n-tier-architecture/\\",rel:\\"nofollow\\",children:\\"Microsoft Learn N-tier example app\\"}),` instructions for deploying the \\"What\'s for Lunch?\\" app, you will have the following deployed.`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-three-tier-application-overview.png\\",alt:\\"three tier application overview\\",width:\\"1020\\",height:\\"517\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"What\\\\u2019s deployed:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Microsoft Azure VM presentation tier that renders an HTML client in the user\'s browser and enables user requests to be sent to the \\\\u201CWhat\\\\u2019s for Lunch?\\\\u201D app\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Microsoft Azure VM application tier that communicates with the presentation and the database tier\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Microsoft Azure SQL instance in the database tier, handling requests from the application tier to store and serve data\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"At the end of the blog, we will also provide a Playwright script that can be run to send requests to this app in order to load it with example data and exercise its functionality. This will help drive metrics to \\\\u201Clight up\\\\u201D the dashboards.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"setting-it-all-up\\",children:\\"Setting it all up\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s walk through the details of how to deploy the example three-tier application, Azure integration on Elastic and visualize what gets ingested in Elastic\\\\u2019s Kibana\\\\xAE dashboards.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-0-get-an-account-on-elastic-cloud\\",children:\\"Step 0: Get an account on Elastic Cloud\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Follow the instructions to \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-free-trial.png\\",alt:\\"elastic cloud free trial sign up\\",width:\\"1365\\",height:\\"1326\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-deploy-the-microsoft-azure-three-tier-application\\",children:\\"Step 1: Deploy the Microsoft Azure three-tier application\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"From the \\",(0,t.jsx)(e.a,{href:\\"https://portal.azure.com/\\",rel:\\"nofollow\\",children:\\"Azure portal\\"}),\\", click the Cloud Shell icon at the top of the portal to open Cloud Shell\\\\u2026\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-open-cloud-shell.png\\",alt:\\"open cloud shell\\",width:\\"1824\\",height:\\"408\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"\\\\u2026 and when the Cloud Shell first opens, select \\",(0,t.jsx)(e.strong,{children:\\"Bash\\"}),\\" as the shell type to use.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-cloud-shell-bash.png\\",alt:\\"cloud shell bash\\",width:\\"1848\\",height:\\"994\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you\\\\u2019re prompted that \\\\u201CYou have no storage mounted,\\\\u201D then click the \\",(0,t.jsx)(e.strong,{children:\\"Create storage\\"}),\\" button to create a file store to be used for saving and editing files from Cloud Shell.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-storage.png\\",alt:\\"cloud shell create storage\\",width:\\"1815\\",height:\\"990\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You should now see the open Cloud Shell terminal.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-cloud-shell-terminal.png\\",alt:\\"cloud shell terminal\\",width:\\"1848\\",height:\\"828\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Run the following command in Cloud Shell to define the environment variables that we\\\\u2019ll be using in the Cloud Shell commands required to deploy and view the sample application.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Be sure to specify a valid RESOURCE_GROUP from your available \\",(0,t.jsx)(e.a,{href:\\"https://portal.azure.com/#view/HubsExtension/BrowseResourceGroups\\",rel:\\"nofollow\\",children:\\"Resource Groups listed in the Azure portal\\"}),\\". Also specify a new password to replace the SpecifyNewPasswordHere placeholder text before running the command. See the Microsoft \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/sql/relational-databases/security/password-policy?view=sql-server-ver16#password-complexity\\",rel:\\"nofollow\\",children:\\"password policy documentation\\"}),\\" for password requirements.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`RESOURCE_GROUP=\\"test\\"\\nAPP_PASSWORD=\\"SpecifyNewPasswordHere\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Run the following az deployment group create command, which will deploy the example three-tier web app in around five minutes.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`az deployment group create --resource-group $RESOURCE_GROUP --template-uri https://raw.githubusercontent.com/MicrosoftDocs/mslearn-n-tier-architecture/master/Deployment/azuredeploy.json --parameters password=$APP_PASSWORD\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"After the deployment has completed, run the following command, which returns the URL for the app.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`az deployment group show --output table --resource-group $RESOURCE_GROUP --name azuredeploy --query properties.outputs.webSiteUrl\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Copy the web app URL and paste it into a browser to view the example \\\\u201CWhat\\\\u2019s for Lunch?\\\\u201D web app.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-whats-for-lunch.png\\",alt:\\"whats for lunch app\\",width:\\"988\\",height:\\"479\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-create-an-azure-service-principal-and-grant-access-permission\\",children:\\"Step 2: Create an Azure service principal and grant access permission\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Go to the \\",(0,t.jsx)(e.a,{href:\\"https://portal.azure.com/\\",rel:\\"nofollow\\",children:\\"Microsoft Azure Portal\\"}),\\". Search for active directory and select \\",(0,t.jsx)(e.strong,{children:\\"Microsoft Entra ID\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-active-directory.png\\",alt:\\"search active directory\\",width:\\"627\\",height:\\"219\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Copy the \\",(0,t.jsx)(e.strong,{children:\\"Tenant ID\\"}),\\" for use in a later step in this blog post. This ID is required to configure Elastic Agent to connect to your Azure account.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-your-organization-overview.png\\",alt:\\"your organization overview\\",width:\\"961\\",height:\\"560\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the navigation pane, select \\",(0,t.jsx)(e.strong,{children:\\"App registrations\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-your-organization-overview-app-registrations.png\\",alt:\\"your organization overview app registrations\\",width:\\"490\\",height:\\"743\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then click \\",(0,t.jsx)(e.strong,{children:\\"New registration\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-your-organization-new-registration.png\\",alt:\\"your organization new registrations\\",width:\\"621\\",height:\\"230\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Type the name of your application (this tutorial uses three-tier-app-azure) and click \\",(0,t.jsx)(e.strong,{children:\\"Register\\"}),\\" (accept the default values for other settings).\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-register_an_application.png\\",alt:\\"register an application\\",width:\\"971\\",height:\\"791\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Copy the \\",(0,t.jsx)(e.strong,{children:\\"Application (client) ID\\"}),\\" and save it for later. This ID is required to configure Elastic Agent to connect to your Azure account.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the navigation pane, select \\",(0,t.jsx)(e.strong,{children:\\"Certificates & secrets\\"}),\\" , and then click \\",(0,t.jsx)(e.strong,{children:\\"New client secret\\"}),\\" to create a new security key.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-three-tier-app-new-client-secret.png\\",alt:\\"three tier app new client secret\\",width:\\"955\\",height:\\"635\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Type a description of the secret and select an expiration. Click \\",(0,t.jsx)(e.strong,{children:\\"Add\\"}),\\" to create the client secret. Under \\",(0,t.jsx)(e.strong,{children:\\"Value\\"}),\\" , copy the secret value and save it (along with your client ID) for later.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"After creating the Azure service principal, you need to grant it the correct permissions. In the Azure Portal, search for and select \\",(0,t.jsx)(e.strong,{children:\\"Subscriptions\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-three-tier-subscriptions.png\\",alt:\\"three tier subscriptions\\",width:\\"530\\",height:\\"212\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the Subscriptions page, click the name of your subscription. On the subscription details page, copy your \\",(0,t.jsx)(e.strong,{children:\\"Subscription ID\\"}),\\" and save it for a later step.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-subscription-essentials-copy.png\\",alt:\\"subscription essentials copy\\",width:\\"782\\",height:\\"248\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the navigation pane, select \\",(0,t.jsx)(e.strong,{children:\\"Access control (IAM)\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-subscription-access-control.png\\",alt:\\"subscription access control\\",width:\\"494\\",height:\\"299\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Add\\"}),\\" and select \\",(0,t.jsx)(e.strong,{children:\\"Add role assignment\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-subscription-access-control-add-role-assignment.png\\",alt:\\"subscription access control add role assignment\\",width:\\"603\\",height:\\"284\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"On the \\",(0,t.jsx)(e.strong,{children:\\"Role\\"}),\\" tab, select the \\",(0,t.jsx)(e.strong,{children:\\"Monitoring Reader\\"}),\\" role and then click \\",(0,t.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-role-assignment-monitoring-readers.png\\",alt:\\"add role assignment monitoring reader\\",width:\\"777\\",height:\\"633\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"On the \\",(0,t.jsx)(e.strong,{children:\\"Members\\"}),\\" tab, select the option to assign access to \\",(0,t.jsx)(e.strong,{children:\\"User, group, or service principal\\"}),\\". Click \\",(0,t.jsx)(e.strong,{children:\\"Select members\\"}),\\" , and then search for and select the principal you created earlier. For the description, enter the name of your service principal. Click \\",(0,t.jsx)(e.strong,{children:\\"Next\\"}),\\" to review the role assignment.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-role-assignment-description.png\\",alt:\\"add role assignment description\\",width:\\"743\\",height:\\"665\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Review + assign\\"}),\\" to grant the service principal access to your subscription.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-role-assignment-review-assign.png\\",alt:\\"add role assignment review assign\\",width:\\"798\\",height:\\"504\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-3-create-an-azure-vm-instance\\",children:\\"Step 3: Create an Azure VM instance\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the Azure Portal, search for and select \\",(0,t.jsx)(e.strong,{children:\\"Virtual machines\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-search-virtual-machines.png\\",alt:\\"search virtual machines\\",width:\\"629\\",height:\\"213\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"On the \\",(0,t.jsx)(e.strong,{children:\\"Virtual machines\\"}),\\" page, click \\",(0,t.jsx)(e.strong,{children:\\"+ Create\\"}),\\" and select \\",(0,t.jsx)(e.strong,{children:\\"Azure virtual machine\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-azure-virtual-machine.png\\",alt:\\"azure virtual machine\\",width:\\"466\\",height:\\"284\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"On the Virtual machine creation page, enter a name like \\\\u201Cmetrics-vm\\\\u201D for the virtual machine name and select VM Size to be \\\\u201CStandard_D2s_v3 - 2 vcpus, 8 GiB memory.\\\\u201D Click the \\",(0,t.jsx)(e.strong,{children:\\"Next : Disks\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-virtual-macine-next-disks.png\\",alt:\\"create a virtual machine next disks\\",width:\\"799\\",height:\\"1190\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"On the \\",(0,t.jsx)(e.strong,{children:\\"Disks\\"}),\\" page, keep the default settings and click the \\",(0,t.jsx)(e.strong,{children:\\"Next : Networking\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-virtual-machine-next-networking.png\\",alt:\\"create a virtual machine next networking\\",width:\\"794\\",height:\\"786\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"On the \\",(0,t.jsx)(e.strong,{children:\\"Networking\\"}),\\" page, demo-vnet should be selected for \\",(0,t.jsx)(e.strong,{children:\\"Virtual network\\"}),\\" and demo-biz-subnet should be selected for \\",(0,t.jsx)(e.strong,{children:\\"Subnet\\"}),\\". These resources are created as part of the three-tier example app\\\\u2019s deployment that was done in Step 1.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"Review + create\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-virtual-machine-review-create.png\\",alt:\\"create virtual machine review create\\",width:\\"782\\",height:\\"786\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"On the \\",(0,t.jsx)(e.strong,{children:\\"Review\\"}),\\" page, click the \\",(0,t.jsx)(e.strong,{children:\\"Create\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-create-virtual-machine-validation-passed.png\\",alt:\\"create virtual machine validation passed\\",width:\\"931\\",height:\\"692\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-4-install-the-azure-resource-metrics-integration\\",children:\\"Step 4: Install the Azure Resource Metrics integration\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In your \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" deployment, navigate to the Elastic Azure integrations by selecting \\",(0,t.jsx)(e.strong,{children:\\"Integrations\\"}),\\" from the top-level menu. Search for azure resource and click the \\",(0,t.jsx)(e.strong,{children:\\"Azure Resource Metrics\\"}),\\" tile.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-integrations-azure-resource-metrics.png\\",alt:\\"integrations azure resource metrics\\",width:\\"995\\",height:\\"689\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Add Azure Resource Metrics.\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-azure-resource-metrics.png\\",alt:\\"azure resource metrics\\",width:\\"1319\\",height:\\"773\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Add integration only (skip agent installation)\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-integration-only.png\\",alt:\\"add integration only\\",width:\\"1197\\",height:\\"449\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Enter the values that you saved previously for Client ID, Client Secret, Tenant ID, and Subscription ID.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-azure-resource-metrics-integration.png\\",alt:\\"add azure resource metrics integration\\",width:\\"779\\",height:\\"1999\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As you can see, the Azure Resource Metrics integration will collect a significant amount of data from eight Azure services. Click \\",(0,t.jsx)(e.strong,{children:\\"Save and continue\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You\\\\u2019ll be presented with a confirmation dialog window. Click \\",(0,t.jsx)(e.strong,{children:\\"Add Elastic Agent to your hosts\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-azure-resource-metrics-integration-added.png\\",alt:\\"azure resource metrics integration added\\",width:\\"913\\",height:\\"264\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This will display the instructions required to install the Elastic agent. Copy the command under the \\",(0,t.jsx)(e.strong,{children:\\"Linux Tar\\"}),\\" tab.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-agent.png\\",alt:\\"add agent linux tar\\",width:\\"1459\\",height:\\"1613\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Next you will need to use SSH to log in to the Azure VM instance and run the commands copied from \\",(0,t.jsx)(e.strong,{children:\\"Linux Tar\\"}),\\" tab. Go to \\",(0,t.jsx)(e.a,{href:\\"https://portal.azure.com/#blade/HubsExtension/BrowseResourceBlade/resourceType/Microsoft.Compute/VirtualMachines\\",rel:\\"nofollow\\",children:\\"Azure Virtual Machines\\"}),\\" in the Azure portal. Then click the name of the VM instance that you created in Step 3.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-metrics-vm.png\\",alt:\\"metrics vm\\",width:\\"684\\",height:\\"301\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"Select\\"}),\\" button in the \\",(0,t.jsx)(e.strong,{children:\\"SSH Using Azure CLI\\"}),\\" section.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-metrics-vm-connect.png\\",alt:\\"metrics vm connect\\",width:\\"828\\",height:\\"889\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Select the \\\\u201CI understand \\\\u2026\\\\u201D checkbox and then click the \\",(0,t.jsx)(e.strong,{children:\\"Configure + connect\\"}),\\" button.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-ssh-using-azure-cli.png\\",alt:\\"ssh using azure cli\\",width:\\"685\\",height:\\"772\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once you are SSH\\\\u2019d inside the VM instance terminal window, run the commands copied previously from \\",(0,t.jsx)(e.strong,{children:\\"Linux Tar tab\\"}),\\" in the \\",(0,t.jsx)(e.strong,{children:\\"Install Elastic Agent on your host\\"}),\\" instructions. When the installation completes, you\\\\u2019ll see a confirmation message in the Install Elastic Agent on your host form.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-add-agent-confirmed.png\\",alt:\\"add agent confirmed\\",width:\\"790\\",height:\\"830\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Super! The Elastic agent is sending data to Elastic Cloud. Now let\\\\u2019s observe some metrics.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-5-run-traffic-against-the-application\\",children:\\"Step 5: Run traffic against the application\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"While getting the application running is fairly easy, there is nothing to monitor or observe with Elastic unless you add a load on the application.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Here is a simple script you can also run using \\",(0,t.jsx)(e.a,{href:\\"https://playwright.dev/\\",rel:\\"nofollow\\",children:\\"Playwright\\"}),\\" to add traffic and exercise the functionality of the Azure three-tier application:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-javascript\\",children:`import { test, expect } from \\"@playwright/test\\";\\n\\ntest(\\"homepage for Microsoft Azure three tier app\\", async ({ page }) => {\\n // Load web app\\n await page.goto(\\"https://20.172.198.231/\\");\\n // Add lunch suggestions\\n await page.fill(\\"id=txtAdd\\", \\"tacos\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"sushi\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"pizza\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"burgers\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"salad\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n await page.fill(\\"id=txtAdd\\", \\"sandwiches\\");\\n await page.keyboard.press(\\"Enter\\");\\n await page.waitForTimeout(1000);\\n // Click vote buttons\\n await page.getByRole(\\"button\\").nth(1).click();\\n await page.getByRole(\\"button\\").nth(3).click();\\n await page.getByRole(\\"button\\").nth(5).click();\\n await page.getByRole(\\"button\\").nth(7).click();\\n await page.getByRole(\\"button\\").nth(9).click();\\n await page.getByRole(\\"button\\").nth(11).click();\\n // Click remove buttons\\n await page.getByRole(\\"button\\").nth(12).click();\\n await page.getByRole(\\"button\\").nth(10).click();\\n await page.getByRole(\\"button\\").nth(8).click();\\n await page.getByRole(\\"button\\").nth(6).click();\\n await page.getByRole(\\"button\\").nth(4).click();\\n await page.getByRole(\\"button\\").nth(2).click();\\n});\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-6-view-azure-dashboards-in-elastic\\",children:\\"Step 6: View Azure dashboards in Elastic\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With Elastic Agent running, you can go to Elastic Dashboards to view what\\\\u2019s being ingested. Simply search for \\\\u201Cdashboard\\\\u201D in Elastic and choose \\",(0,t.jsx)(e.strong,{children:\\"Dashboard\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-dashboard.png\\",alt:\\"dashboard\\",width:\\"922\\",height:\\"148\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This will open the Elastic Dashboards page. In the Dashboards search box, search for azure vm and click the \\",(0,t.jsx)(e.strong,{children:\\"[Azure Metrics] Compute VMs Overview\\"}),\\" dashboard, one of the many out-of-the-box dashboards available.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-dashboards-create.png\\",alt:\\"dashboards create\\",width:\\"1102\\",height:\\"479\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will see a Dashboard populated with your deployed application\\\\u2019s VM metrics.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-monitors-metrics-microsoft-azure/blog-elastic-azure-compute-vm.png\\",alt:\\"azure compute vm\\",width:\\"1872\\",height:\\"1876\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"On the Azure Compute VM dashboard, we can see the following sampling of some of the many available metrics:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"CPU utilization\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Available memory\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Network sent and received bytes\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Disk writes and reads metrics\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"For metrics not covered by out-of-the-box dashboards, custom dashboards can be easily created to visualize metrics that are important to you.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Congratulations, you have now started monitoring metrics from Microsoft Azure services for your application!\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"analyze-your-data-with-elastic-ai-assistant\\",children:\\"Analyze your data with Elastic AI Assistant\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once metrics and logs (or either one) are in Elastic, start analyzing your data with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability\\",rel:\\"nofollow\\",children:\\"context-aware insights using the Elastic AI Assistant for Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion-monitoring-microsoft-azure-service-metrics-with-elastic-observability-is-easy\\",children:\\"Conclusion: Monitoring Microsoft Azure service metrics with Elastic Observability is easy!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We hope you\\\\u2019ve gotten an appreciation for how Elastic Observability can help you monitor Azure service metrics. Here\\\\u2019s a quick recap of what you learned:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Elastic Observability supports ingest and analysis of Azure service metrics.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"It\\\\u2019s easy to set up ingest from Azure services via the Elastic Agent.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Elastic Observability has multiple out-of-the-box Azure service dashboards you can use to preliminarily review information and then modify for your needs.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Try it out for yourself by signing up via \\",(0,t.jsx)(e.a,{href:\\"https://portal.azure.com/#view/Microsoft_Azure_Marketplace/GalleryItemDetailsBladeNopdl/id/elastic.ec-azure-pp\\",rel:\\"nofollow\\",children:\\"Microsoft Azure Marketplace\\"}),\\" and quickly spin up a deployment in minutes on any of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_azure_regions\\",rel:\\"nofollow\\",children:\\"Elastic Cloud regions on Microsoft Azure\\"}),\\" around the world. Your Azure Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with Microsoft Azure.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return v(A);})();\\n;return Component;"},"_id":"articles/elastic-observability-monitors-metrics-microsoft-azure.mdx","_raw":{"sourceFilePath":"articles/elastic-observability-monitors-metrics-microsoft-azure.mdx","sourceFileName":"elastic-observability-monitors-metrics-microsoft-azure.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-observability-monitors-metrics-microsoft-azure"},"type":"Article","imageUrl":"/assets/images/observability-monitors-metrics-microsoft-azure/Azure_Dark_(1).png","readingTime":"12 min read","url":"/observability-monitors-metrics-microsoft-azure","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"Three-tier application overview","href":"#three-tier-application-overview"},{"level":2,"title":"Setting it all up","href":"#setting-it-all-up"},{"level":3,"title":"Step 0: Get an account on Elastic Cloud","href":"#step-0-get-an-account-on-elastic-cloud"},{"level":3,"title":"Step 1: Deploy the Microsoft Azure three-tier application","href":"#step-1-deploy-the-microsoft-azure-three-tier-application"},{"level":3,"title":"Step 2: Create an Azure service principal and grant access permission","href":"#step-2-create-an-azure-service-principal-and-grant-access-permission"},{"level":3,"title":"Step 3: Create an Azure VM instance","href":"#step-3-create-an-azure-vm-instance"},{"level":3,"title":"Step 4: Install the Azure Resource Metrics integration","href":"#step-4-install-the-azure-resource-metrics-integration"},{"level":3,"title":"Step 5: Run traffic against the application","href":"#step-5-run-traffic-against-the-application"},{"level":3,"title":"Step 6: View Azure dashboards in Elastic","href":"#step-6-view-azure-dashboards-in-elastic"},{"level":2,"title":"Analyze your data with Elastic AI Assistant","href":"#analyze-your-data-with-elastic-ai-assistant"},{"level":2,"title":"Conclusion: Monitoring Microsoft Azure service metrics with Elastic Observability is easy!","href":"#conclusion-monitoring-microsoft-azure-service-metrics-with-elastic-observability-is-easy"}]},{"title":"Using Elastic to observe GKE Autopilot clusters","slug":"observe-gke-autopilot-clusters","date":"2023-03-15","description":"See how deploying the Elastic Agent onto a GKE Autopilot cluster makes observing the cluster’s behavior easy. Kibana integrations make visualizing the behavior a simple addition to your observability dashboards.","image":"blog-elastic-kubernetes-dashboard.png","author":[{"slug":"eric-lowry","type":"Author","_raw":{}}],"subtitle":"Elastic Agent provides a new observability option for fully managed GKE clusters","tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"google-cloud","type":"Tag","_raw":{}},{"slug":"gke","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"elastic-agent","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic has formally supported Google Kubernetes Engine (GKE) since January 2020, when Elastic Cloud on Kubernetes was announced. Since then, Google has expanded GKE, with new service offerings and delivery mechanisms. One of those new offerings is GKE Autopilot. Where GKE is a managed Kubernetes environment, GKE Autopilot is a mode of Kubernetes operation where Google manages your cluster configuration, scaling, security, and more. It is production ready and removes many of the challenges associated with tasks like workload management, deployment automation, and scalability rules. Autopilot lets you focus on building and deploying your application while Google manages everything else.\\n\\nElastic is committed to supporting Google Kubernetes Engine (GKE) in all of its delivery modes. In October, during the Google Cloud Next ‘22 event, we announced our intention to integrate and certify Elastic Agent on Anthos, Autopilot, Google Distributed Cloud, and more.\\n\\nSince that event, we have worked together with Google to get the Elastic Agent certified for use on Anthos, but we didn’t stop there.\\n\\nToday we are happy to [announce](https://github.com/elastic/elastic-agent/blob/autopilotdocumentaton/docs/elastic-agent-gke-autopilot.md) that we have been certified for operation on GKE Autopilot.\\n\\n## Hands on with Elastic and GKE Autopilot\\n\\n### [Kubernetes observability](https://www.elastic.co/observability/kubernetes-monitoring) has never been easier\\n\\nTo show how easy it is to get started with Autopilot and Elastic, let\'s walk through deploying the Elastic Agent on an Autopilot cluster. I’ll show how easy it is to set up and monitor an Autopilot cluster with the Elastic Agent and observe the cluster’s behavior with Kibana integrations.\\n\\nOne of the main differences between GKE and GKE Autopilot is that Autopilot protects the system namespace “kube-system.” To increase the stability and security of a cluster, Autopilot prevents user space workloads from adding or modifying system pods. The default configuration for Elastic Agent is to install itself into the system namespace. The majority of the changes we will make here are to convince the Elastic Agent to run in a different namespace.\\n\\n## Let’s get started with Elastic Stack!\\n\\nWhile writing this article, I used the latest version of Elastic. The best way for you to get started with Elastic Observability is to:\\n\\n1. Get an account on [Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home) and look at this [tutoria](https://www.elastic.co/videos/training-how-to-series-cloud)l to help launch your first stack, or\\n2. [Launch Elastic Cloud on your Google Account](https://www.elastic.co/partners/google-cloud)\\n\\n## Provisioning an Autopilot cluster and an Elastic stack\\n\\nTo test the agent, I first deployed the recommended, default GKE Autopilot cluster. Elastic’s GKE integration supports kube-state-metrics (KSM), which will increase the number of reported metrics available for reporting and dashboards. Like the Elastic Agent, KSM defaults to running in the system namespace, so I modified its manifest to work with Autopilot. For my testing, I also deployed a basic Elastic stack on Elastic Cloud in the same Google region as my Autopilot cluster. I used a fresh cluster deployed on Elastic’s managed service (ESS), but the process is the same if you are using an Elastic Cloud subscription purchased through the Google marketplace.\\n\\n## Adding Elastic Observability to GKE Autopilot\\n\\nBecause this is a brand new deployment, Elastic suggests adding integrations to it. Let’s add the Kubernetes integration into the new deployment:\\n\\n![elastic agent GKE autopilot welcome](/assets/images/observe-gke-autopilot-clusters/blog-welcome-to-elastic.png)\\n\\nElastic offers hundreds of integrations; filter the list by typing “kub” into the search bar (1) and then click the Kubernetes integration (2).\\n\\n![elastic agent GKE autopilot kubernetes integration](/assets/images/observe-gke-autopilot-clusters/blog-elastic-kubernetes-integration.png)\\n\\nThe Kubernetes integration page gives you an overview of the integration and lets you manage the Kubernetes clusters you want to observe. We haven’t added a cluster yet, so I clicked “Add Kubernetes” to add the first integration.\\n\\n![elastic agent GKE autopilot add kubernetes](/assets/images/observe-gke-autopilot-clusters/blog-elastic-add-kubernetes.png)\\n\\nI changed the integration name to reflect the Kubernetes offering type and then clicked “Save and continue” to accept the integration defaults.\\n\\n![elastic agent GKE autopilot add kubernetes integration](/assets/images/observe-gke-autopilot-clusters/blog-elastic-add-kubernetes-integration.png)\\n\\nAt this point, an Agent policy has been created. Now it’s time to install the agent. I clicked on the “Kubernetes” integration.\\n\\n![elastic agent GKE autopilot agent policy](/assets/images/observe-gke-autopilot-clusters/blog-elastic-agent-policy-1.png)\\n\\nThen I selected the “integration policies” tab (1) and clicked “Add agent” (2).\\n\\n![elastic agent GKE autopilot add agent](/assets/images/observe-gke-autopilot-clusters/blog-elastic-add-agent.png)\\n\\nFinally, I downloaded the full manifest for a standard GKE environment.\\n\\n![elastic agent GKE autopilot download manifest](/assets/images/observe-gke-autopilot-clusters/blog-elastic-download-manifest.png)\\n\\nWe won’t be using this manifest directly, but it contains many of the values that we will need to deploy the agent on Autopilot in the next section.\\n\\nThe Elastic stack is ready and waiting for the Autopilot logs, metrics, and events. It’s time to connect Autopilot to this deployment using the Elastic Agent for GKE.\\n\\n## Connect Autopilot to Elastic\\n\\nFrom the Google cloud terminal, I downloaded and edited the Elastic Agent manifest for GKE Autopilot.\\n\\n```bash\\n$ curl -o elastic-agent-managed-gke-autopilot.yaml \\\\\\nhttps://github.com/elastic/elastic-agent/blob/autopilotdocumentaton/docs/manifests/elastic-agent-managed-gke-autopilot.yaml\\n```\\n\\n![elastic agent GKE autopilot cloud shell editor](/assets/images/observe-gke-autopilot-clusters/blog-elastic-cloud-shell-editor.png)\\n\\nI used the cloud shell editor to configure the manifest for my Autopilot and Elastic clusters. For example, I updated the following:\\n\\n```yaml\\ncontainers:\\n - name: elastic-agent\\n image: docker.elastic.co/beats/elastic-agent:8.16.1\\n```\\n\\nI also changed the agent to the version of Elastic that I installed (8.6.0).\\n\\n![elastic agent GKE autopilot google cloud](/assets/images/observe-gke-autopilot-clusters/blog-elastic-google-cloud.png)\\n\\nFrom the Integration manifest I downloaded earlier, I copied the values for FLEET_URL and FLEET_ENROLLMENT_TOKEN into this YAML file.\\n\\nNow it’s time to apply the updated manifest to the Autopilot instance.\\n\\nBefore I commit, I always like to see what’s going to be created (and check for syntax errors) with a dry run.\\n\\n```bash\\n$ clear\\n$ kubectl apply --dry-run=\\"client\\" -f elastic-agent-managed-gke-autopilot.yaml\\n```\\n\\n![elastic agent GKE autopilot dry run](/assets/images/observe-gke-autopilot-clusters/blog-elastic-dry-run.png)\\n\\nEverything looks good, so I’ll do it for real this time.\\n\\n```bash\\n$ clear\\n$ kubectl apply -f elastic-agent-managed-gke-autopilot.yaml\\n```\\n\\n![elastic agent GKE autopilot cluster](/assets/images/observe-gke-autopilot-clusters/blog-elastic-autopilot-cluster.png)\\n\\nAfter several minutes, metrics will start flowing from the Autopilot cluster directly into the Elastic deployment.\\n\\n## Adding a workload to the Autopilot cluster\\n\\nObserving an Autopilot cluster without a workload is boring, so I deployed a modified version of Google’s [Hipster Shop](https://github.com/bshetti/opentelemetry-microservices-demo) (which includes OpenTelemetry reporting):\\n\\n```yaml\\n$ git clone https://github.com/bshetti/opentelemetry-microservices-demo\\n$ cd opentelemetry-microservices-demo\\n$ nano ./deploy-with-collector-k8s/otelcollector.yaml\\n```\\n\\nTo get the application’s telemetry talking to our Elastic stack, I replaced all instances of the exporter type from HTTP (otlphttp/elastic) to gRPC (otlp/elastic). I then replaced OTEL_EXPORTER_OTLP_ENDPOINT with my APM endpoint and I replaced OTEL_EXPORTER_OTLP_HEADERS with my APM OTEL Bearer and Token.\\n\\n![elastic agent GKE autopilot terminal telemetry](/assets/images/observe-gke-autopilot-clusters/blog-elastic-terminal-telemetry.png)\\n\\nThen I deployed the Hipster Shop.\\n\\n```bash\\n$ kubectl create -f ./deploy-with-collector-k8s/adservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/redis.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/cartservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/checkoutservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/currencyservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/emailservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/frontend.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/paymentservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/productcatalogservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/recommendationservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/shippingservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/loadgenerator.yaml\\n```\\n\\nOnce all of the shop’s pods were running, I deployed the OpenTelemetry collector.\\n\\n```bash\\n$ kubectl create -f ./deploy-with-collector-k8s/otelcollector.yaml\\n```\\n\\n![elastic agent GKE autopilot deployed opentelemetry collector](/assets/images/observe-gke-autopilot-clusters/blog-elastic-deployed-opentelemetry-collector.png)\\n\\n## Observe and visualize Autopilot’s metrics\\n\\nNow that we have added the Elastic Agent to our Autopilot cluster and added a workload, let\'s take a look at some of the Kubernetes visualizations the integration provides out of the box.\\n\\nThe “[Metrics Kubernetes] Overview” is a great place to start. It provides a high-level view of the resources used by the cluster and allows me to drill into more specific dashboards that I find interesting:\\n\\n![elastic agent GKE autopilot create visualization](/assets/images/observe-gke-autopilot-clusters/blog-elastic-create-visualization.png)\\n\\nFor example, the “[Metrics Kubernetes] Pods” gives me a high-level view of the pods deployed in the cluster:\\n\\n![elastic agent GKE autopilot pod](/assets/images/observe-gke-autopilot-clusters/blog-elastic-pod.png)\\n\\nThe “[Metrics Kubernetes] Volumes” gives me an in-depth view to how storage is allocated and used in the Autopilot cluster:\\n\\n![elastic agent GKE autopilot filesystem information](/assets/images/observe-gke-autopilot-clusters/blog-elastic-filesystem-information.png)\\n\\n## Creating an alert\\n\\nFrom here, I can easily discover patterns in my cluster’s behavior and even create Alerts. Here is an example of an alert to notify me if the the main storage volume (called “volume”) exceeds 80% of its allocated space:\\n\\n![elastic agent GKE autopilot create rule](/assets/images/observe-gke-autopilot-clusters/blog-elastic-create-rule-elasticsearch-query.png)\\n\\nWith a little work, I created this view from the standard dashboard:\\n\\n![elastic agent GKE autopilot kubernetes dashboard](/assets/images/observe-gke-autopilot-clusters/blog-elastic-kubernetes-dashboard.png)\\n\\n## Conclusion\\n\\nToday I have shown how easy it is to monitor, observe, and generate alerts on a GKE Autopilot cluster. To get more information on what is possible, see the official Elastic documentation for [Autopilot observability with Elastic Agent](https://github.com/elastic/elastic-agent/blob/autopilotdocumentaton/docs/elastic-agent-gke-autopilot.md).\\n\\n## Next steps\\n\\nIf you don’t have Elastic yet, you can get started for free with an [Elastic Trial](https://www.elastic.co/cloud/elasticsearch-service/signup) today. Get more from Elastic and Google together with a [Marketplace subscription](https://console.cloud.google.com/marketplace/browse?q=Elastic&utm_source=Elastic&utm_medium=qwiklabs&utm_campaign=Qwiklabs+to+Marketplace). Elastic does more than just integrate with GKE — check out the almost [300 integrations](https://www.elastic.co/integrations) that Elastic provides.\\n","code":"var Component=(()=>{var g=Object.create;var l=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var w=(o,e)=>()=>(e||o((e={exports:{}}).exports,e),e.exports),y=(o,e)=>{for(var i in e)l(o,i,{get:e[i],enumerable:!0})},s=(o,e,i,n)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of p(e))!b.call(o,a)&&a!==i&&l(o,a,{get:()=>e[a],enumerable:!(n=u(e,a))||n.enumerable});return o};var f=(o,e,i)=>(i=o!=null?g(m(o)):{},s(e||!o||!o.__esModule?l(i,\\"default\\",{value:o,enumerable:!0}):i,o)),k=o=>s(l({},\\"__esModule\\",{value:!0}),o);var c=w((K,r)=>{r.exports=_jsx_runtime});var E={};y(E,{default:()=>h,frontmatter:()=>v});var t=f(c()),v={title:\\"Using Elastic to observe GKE Autopilot clusters\\",slug:\\"observe-gke-autopilot-clusters\\",date:\\"2023-03-15\\",subtitle:\\"Elastic Agent provides a new observability option for fully managed GKE clusters\\",description:\\"See how deploying the Elastic Agent onto a GKE Autopilot cluster makes observing the cluster\\\\u2019s behavior easy. Kibana integrations make visualizing the behavior a simple addition to your observability dashboards.\\",author:[{slug:\\"eric-lowry\\"}],image:\\"blog-elastic-kubernetes-dashboard.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"kubernetes\\"},{slug:\\"google-cloud\\"},{slug:\\"gke\\"},{slug:\\"metrics\\"},{slug:\\"elastic-agent\\"}]};function d(o){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",...o.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"Elastic has formally supported Google Kubernetes Engine (GKE) since January 2020, when Elastic Cloud on Kubernetes was announced. Since then, Google has expanded GKE, with new service offerings and delivery mechanisms. One of those new offerings is GKE Autopilot. Where GKE is a managed Kubernetes environment, GKE Autopilot is a mode of Kubernetes operation where Google manages your cluster configuration, scaling, security, and more. It is production ready and removes many of the challenges associated with tasks like workload management, deployment automation, and scalability rules. Autopilot lets you focus on building and deploying your application while Google manages everything else.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic is committed to supporting Google Kubernetes Engine (GKE) in all of its delivery modes. In October, during the Google Cloud Next \\\\u201822 event, we announced our intention to integrate and certify Elastic Agent on Anthos, Autopilot, Google Distributed Cloud, and more.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Since that event, we have worked together with Google to get the Elastic Agent certified for use on Anthos, but we didn\\\\u2019t stop there.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Today we are happy to \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-agent/blob/autopilotdocumentaton/docs/elastic-agent-gke-autopilot.md\\",rel:\\"nofollow\\",children:\\"announce\\"}),\\" that we have been certified for operation on GKE Autopilot.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"hands-on-with-elastic-and-gke-autopilot\\",children:\\"Hands on with Elastic and GKE Autopilot\\"}),`\\n`,(0,t.jsxs)(e.h3,{id:\\"kubernetes-observability-has-never-been-easier\\",children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/kubernetes-monitoring\\",rel:\\"nofollow\\",children:\\"Kubernetes observability\\"}),\\" has never been easier\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"To show how easy it is to get started with Autopilot and Elastic, let\'s walk through deploying the Elastic Agent on an Autopilot cluster. I\\\\u2019ll show how easy it is to set up and monitor an Autopilot cluster with the Elastic Agent and observe the cluster\\\\u2019s behavior with Kibana integrations.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"One of the main differences between GKE and GKE Autopilot is that Autopilot protects the system namespace \\\\u201Ckube-system.\\\\u201D To increase the stability and security of a cluster, Autopilot prevents user space workloads from adding or modifying system pods. The default configuration for Elastic Agent is to install itself into the system namespace. The majority of the changes we will make here are to convince the Elastic Agent to run in a different namespace.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"lets-get-started-with-elastic-stack\\",children:\\"Let\\\\u2019s get started with Elastic Stack!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"While writing this article, I used the latest version of Elastic. The best way for you to get started with Elastic Observability is to:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Get an account on \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and look at this \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/videos/training-how-to-series-cloud\\",rel:\\"nofollow\\",children:\\"tutoria\\"}),\\"l to help launch your first stack, or\\"]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/partners/google-cloud\\",rel:\\"nofollow\\",children:\\"Launch Elastic Cloud on your Google Account\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"provisioning-an-autopilot-cluster-and-an-elastic-stack\\",children:\\"Provisioning an Autopilot cluster and an Elastic stack\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To test the agent, I first deployed the recommended, default GKE Autopilot cluster. Elastic\\\\u2019s GKE integration supports kube-state-metrics (KSM), which will increase the number of reported metrics available for reporting and dashboards. Like the Elastic Agent, KSM defaults to running in the system namespace, so I modified its manifest to work with Autopilot. For my testing, I also deployed a basic Elastic stack on Elastic Cloud in the same Google region as my Autopilot cluster. I used a fresh cluster deployed on Elastic\\\\u2019s managed service (ESS), but the process is the same if you are using an Elastic Cloud subscription purchased through the Google marketplace.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"adding-elastic-observability-to-gke-autopilot\\",children:\\"Adding Elastic Observability to GKE Autopilot\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Because this is a brand new deployment, Elastic suggests adding integrations to it. Let\\\\u2019s add the Kubernetes integration into the new deployment:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-welcome-to-elastic.png\\",alt:\\"elastic agent GKE autopilot welcome\\",width:\\"1999\\",height:\\"950\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic offers hundreds of integrations; filter the list by typing \\\\u201Ckub\\\\u201D into the search bar (1) and then click the Kubernetes integration (2).\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-kubernetes-integration.png\\",alt:\\"elastic agent GKE autopilot kubernetes integration\\",width:\\"1999\\",height:\\"717\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Kubernetes integration page gives you an overview of the integration and lets you manage the Kubernetes clusters you want to observe. We haven\\\\u2019t added a cluster yet, so I clicked \\\\u201CAdd Kubernetes\\\\u201D to add the first integration.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-add-kubernetes.png\\",alt:\\"elastic agent GKE autopilot add kubernetes\\",width:\\"1999\\",height:\\"666\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"I changed the integration name to reflect the Kubernetes offering type and then clicked \\\\u201CSave and continue\\\\u201D to accept the integration defaults.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-add-kubernetes-integration.png\\",alt:\\"elastic agent GKE autopilot add kubernetes integration\\",width:\\"1999\\",height:\\"1171\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"At this point, an Agent policy has been created. Now it\\\\u2019s time to install the agent. I clicked on the \\\\u201CKubernetes\\\\u201D integration.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-agent-policy-1.png\\",alt:\\"elastic agent GKE autopilot agent policy\\",width:\\"1999\\",height:\\"649\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Then I selected the \\\\u201Cintegration policies\\\\u201D tab (1) and clicked \\\\u201CAdd agent\\\\u201D (2).\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-add-agent.png\\",alt:\\"elastic agent GKE autopilot add agent\\",width:\\"1999\\",height:\\"577\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Finally, I downloaded the full manifest for a standard GKE environment.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-download-manifest.png\\",alt:\\"elastic agent GKE autopilot download manifest\\",width:\\"1999\\",height:\\"1171\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We won\\\\u2019t be using this manifest directly, but it contains many of the values that we will need to deploy the agent on Autopilot in the next section.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Elastic stack is ready and waiting for the Autopilot logs, metrics, and events. It\\\\u2019s time to connect Autopilot to this deployment using the Elastic Agent for GKE.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"connect-autopilot-to-elastic\\",children:\\"Connect Autopilot to Elastic\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"From the Google cloud terminal, I downloaded and edited the Elastic Agent manifest for GKE Autopilot.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ curl -o elastic-agent-managed-gke-autopilot.yaml \\\\\\\\\\nhttps://github.com/elastic/elastic-agent/blob/autopilotdocumentaton/docs/manifests/elastic-agent-managed-gke-autopilot.yaml\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-cloud-shell-editor.png\\",alt:\\"elastic agent GKE autopilot cloud shell editor\\",width:\\"1999\\",height:\\"264\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"I used the cloud shell editor to configure the manifest for my Autopilot and Elastic clusters. For example, I updated the following:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`containers:\\n - name: elastic-agent\\n image: docker.elastic.co/beats/elastic-agent:8.16.1\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"I also changed the agent to the version of Elastic that I installed (8.6.0).\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-google-cloud.png\\",alt:\\"elastic agent GKE autopilot google cloud\\",width:\\"1999\\",height:\\"1171\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"From the Integration manifest I downloaded earlier, I copied the values for FLEET_URL and FLEET_ENROLLMENT_TOKEN into this YAML file.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now it\\\\u2019s time to apply the updated manifest to the Autopilot instance.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before I commit, I always like to see what\\\\u2019s going to be created (and check for syntax errors) with a dry run.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ clear\\n$ kubectl apply --dry-run=\\"client\\" -f elastic-agent-managed-gke-autopilot.yaml\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-dry-run.png\\",alt:\\"elastic agent GKE autopilot dry run\\",width:\\"1999\\",height:\\"354\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Everything looks good, so I\\\\u2019ll do it for real this time.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ clear\\n$ kubectl apply -f elastic-agent-managed-gke-autopilot.yaml\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-autopilot-cluster.png\\",alt:\\"elastic agent GKE autopilot cluster\\",width:\\"1999\\",height:\\"379\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"After several minutes, metrics will start flowing from the Autopilot cluster directly into the Elastic deployment.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"adding-a-workload-to-the-autopilot-cluster\\",children:\\"Adding a workload to the Autopilot cluster\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Observing an Autopilot cluster without a workload is boring, so I deployed a modified version of Google\\\\u2019s \\",(0,t.jsx)(e.a,{href:\\"https://github.com/bshetti/opentelemetry-microservices-demo\\",rel:\\"nofollow\\",children:\\"Hipster Shop\\"}),\\" (which includes OpenTelemetry reporting):\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`$ git clone https://github.com/bshetti/opentelemetry-microservices-demo\\n$ cd opentelemetry-microservices-demo\\n$ nano ./deploy-with-collector-k8s/otelcollector.yaml\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"To get the application\\\\u2019s telemetry talking to our Elastic stack, I replaced all instances of the exporter type from HTTP (otlphttp/elastic) to gRPC (otlp/elastic). I then replaced OTEL_EXPORTER_OTLP_ENDPOINT with my APM endpoint and I replaced OTEL_EXPORTER_OTLP_HEADERS with my APM OTEL Bearer and Token.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-terminal-telemetry.png\\",alt:\\"elastic agent GKE autopilot terminal telemetry\\",width:\\"1999\\",height:\\"700\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Then I deployed the Hipster Shop.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ kubectl create -f ./deploy-with-collector-k8s/adservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/redis.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/cartservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/checkoutservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/currencyservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/emailservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/frontend.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/paymentservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/productcatalogservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/recommendationservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/shippingservice.yaml\\n$ kubectl create -f ./deploy-with-collector-k8s/loadgenerator.yaml\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once all of the shop\\\\u2019s pods were running, I deployed the OpenTelemetry collector.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ kubectl create -f ./deploy-with-collector-k8s/otelcollector.yaml\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-deployed-opentelemetry-collector.png\\",alt:\\"elastic agent GKE autopilot deployed opentelemetry collector\\",width:\\"1999\\",height:\\"448\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"observe-and-visualize-autopilots-metrics\\",children:\\"Observe and visualize Autopilot\\\\u2019s metrics\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that we have added the Elastic Agent to our Autopilot cluster and added a workload, let\'s take a look at some of the Kubernetes visualizations the integration provides out of the box.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The \\\\u201C[Metrics Kubernetes] Overview\\\\u201D is a great place to start. It provides a high-level view of the resources used by the cluster and allows me to drill into more specific dashboards that I find interesting:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-create-visualization.png\\",alt:\\"elastic agent GKE autopilot create visualization\\",width:\\"1999\\",height:\\"1121\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For example, the \\\\u201C[Metrics Kubernetes] Pods\\\\u201D gives me a high-level view of the pods deployed in the cluster:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-pod.png\\",alt:\\"elastic agent GKE autopilot pod\\",width:\\"1999\\",height:\\"1121\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The \\\\u201C[Metrics Kubernetes] Volumes\\\\u201D gives me an in-depth view to how storage is allocated and used in the Autopilot cluster:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-filesystem-information.png\\",alt:\\"elastic agent GKE autopilot filesystem information\\",width:\\"1999\\",height:\\"1183\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"creating-an-alert\\",children:\\"Creating an alert\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"From here, I can easily discover patterns in my cluster\\\\u2019s behavior and even create Alerts. Here is an example of an alert to notify me if the the main storage volume (called \\\\u201Cvolume\\\\u201D) exceeds 80% of its allocated space:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-create-rule-elasticsearch-query.png\\",alt:\\"elastic agent GKE autopilot create rule\\",width:\\"1999\\",height:\\"1483\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With a little work, I created this view from the standard dashboard:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observe-gke-autopilot-clusters/blog-elastic-kubernetes-dashboard.png\\",alt:\\"elastic agent GKE autopilot kubernetes dashboard\\",width:\\"1823\\",height:\\"1092\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Today I have shown how easy it is to monitor, observe, and generate alerts on a GKE Autopilot cluster. To get more information on what is possible, see the official Elastic documentation for \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-agent/blob/autopilotdocumentaton/docs/elastic-agent-gke-autopilot.md\\",rel:\\"nofollow\\",children:\\"Autopilot observability with Elastic Agent\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"next-steps\\",children:\\"Next steps\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you don\\\\u2019t have Elastic yet, you can get started for free with an \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/elasticsearch-service/signup\\",rel:\\"nofollow\\",children:\\"Elastic Trial\\"}),\\" today. Get more from Elastic and Google together with a \\",(0,t.jsx)(e.a,{href:\\"https://console.cloud.google.com/marketplace/browse?q=Elastic&utm_source=Elastic&utm_medium=qwiklabs&utm_campaign=Qwiklabs+to+Marketplace\\",rel:\\"nofollow\\",children:\\"Marketplace subscription\\"}),\\". Elastic does more than just integrate with GKE \\\\u2014 check out the almost \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations\\",rel:\\"nofollow\\",children:\\"300 integrations\\"}),\\" that Elastic provides.\\"]})]})}function h(o={}){let{wrapper:e}=o.components||{};return e?(0,t.jsx)(e,{...o,children:(0,t.jsx)(d,{...o})}):d(o)}return k(E);})();\\n;return Component;"},"_id":"articles/elastic-observe-gke-autopilot-clusters.mdx","_raw":{"sourceFilePath":"articles/elastic-observe-gke-autopilot-clusters.mdx","sourceFileName":"elastic-observe-gke-autopilot-clusters.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-observe-gke-autopilot-clusters"},"type":"Article","imageUrl":"/assets/images/observe-gke-autopilot-clusters/blog-elastic-kubernetes-dashboard.png","readingTime":"8 min read","url":"/observe-gke-autopilot-clusters","headings":[{"level":2,"title":"Hands on with Elastic and GKE Autopilot","href":"#hands-on-with-elastic-and-gke-autopilot"},{"level":3,"title":"[Kubernetes observability](https://www.elastic.co/observability/kubernetes-monitoring) has never been easier","href":"#kubernetes-observabilityhttpswwwelasticcoobservabilitykubernetes-monitoring-has-never-been-easier"},{"level":2,"title":"Let’s get started with Elastic Stack!","href":"#lets-get-started-with-elastic-stack"},{"level":2,"title":"Provisioning an Autopilot cluster and an Elastic stack","href":"#provisioning-an-autopilot-cluster-and-an-elastic-stack"},{"level":2,"title":"Adding Elastic Observability to GKE Autopilot","href":"#adding-elastic-observability-to-gke-autopilot"},{"level":2,"title":"Connect Autopilot to Elastic","href":"#connect-autopilot-to-elastic"},{"level":2,"title":"Adding a workload to the Autopilot cluster","href":"#adding-a-workload-to-the-autopilot-cluster"},{"level":2,"title":"Observe and visualize Autopilot’s metrics","href":"#observe-and-visualize-autopilots-metrics"},{"level":2,"title":"Creating an alert","href":"#creating-an-alert"},{"level":2,"title":"Conclusion","href":"#conclusion"},{"level":2,"title":"Next steps","href":"#next-steps"}]},{"title":"Introducing Elastic\'s OpenTelemetry SDK for .NET","slug":"elastic-opentelemetry-distribution-dotnet-applications","date":"2024-04-02","description":"Today, we are excited to announce the alpha release of our new Elastic distribution of the OpenTelemetry SDK for .NET. In this post, we cover a few likely questions you may have about this new distribution and explain how to get started.","image":"OTel-1.jpg","author":[{"slug":"steve-gordon","type":"Author","_raw":{}},{"slug":"martijn-laarman","type":"Author","_raw":{}}],"subtitle":"Adopting OpenTelemetry native solutions for observing .NET applications","tags":[{"slug":"net","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nWe are thrilled to announce the alpha release of our new [Elastic\xae distribution of the OpenTelemetry SDK for .NET](https://github.com/elastic/elastic-otel-dotnet/releases). In this post, we cover a few reasonable questions you may have about this new distribution.\\n\\nDownload the [NuGet package](https://www.nuget.org/packages/Elastic.OpenTelemetry) today if you want to try out this early access release. We welcome all feedback and suggestions to help us enhance the distribution before its stable release.\\n\\n[Check out our announcement blog post](https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions) to learn more about OpenTelemetry and our decision to introduce OpenTelemetry distributions.\\n\\n## The Elastic .NET OpenTelemetry distribution\\n\\nWith the alpha release of the Elastic distribution of the .NET OpenTelemetry SDK, we are embracing OpenTelemetry as the preferred and recommended choice for instrumenting .NET applications.\\n\\nIn .NET, the runtime base class libraries (BCL) include types designed for native OpenTelemetry instrumentation, such as [Activity](https://learn.microsoft.com/en-us/dotnet/api/system.diagnostics.activity) and [Meter](https://learn.microsoft.com/en-us/dotnet/api/system.diagnostics.metrics.meter), making adopting OpenTelemetry-native instrumentation even more convenient.\\n\\nThe current alpha release of our distribution is consciously feature-limited. Our goal is to assess the fitness of the API design and ease of use, laying a solid foundation going forward. We acknowledge that it is likely not suited to all application scenarios, so while we welcome developers installing it to try it out, we don’t currently advise using it for production.\\n\\nIn subsequent releases, we plan to add more features as we move toward feature parity with the existing Elastic APM agent for .NET. Based on user feedback, we will refine the API and move toward a stable release. Until then, we may need to make some breaking API changes to support additional use cases.\\n\\nThe current alpha release supports installation in typical modern workloads such as [ASP.NET Core](https://dotnet.microsoft.com/en-us/apps/aspnet) and [worker services](https://learn.microsoft.com/en-us/dotnet/core/extensions/workers). It best supports modern .NET runtimes, .NET 6.0 and later. We’d love to hear about other scenarios you think we should focus on next.\\n\\nThe types we introduce in the distribution are to support an easy switch from the “vanilla” OpenTelemetry SDK with no (or minimal) code changes. We expect that for most circumstances, merely adding the NuGet package is all that is required to get started.\\n\\nThe initial alpha releases add very little on top of the “vanilla” SDK from OpenTelemetry, but by adopting it early, you can shape its direction. We will deliver valuable enhancements to developers in subsequent releases.\\n\\nIf you’d like to follow the development of the distribution, the code is fully open source and [available on GitHub](https://github.com/elastic/elastic-otel-dotnet). We encourage you to raise issues for bugs or usability pain points you encounter.\\n\\n## How do I get started?\\n\\nGetting started with the Elastic OpenTelemetry distribution is really easy. Simply add a reference to the Elastic OpenTelemetry NuGet package to your project. This can be achieved by adding a package reference to the project (csproj) file.\\n\\n```xml\\n\\n```\\n\\nAfter adding the package reference, you can use the Elastic OpenTelemetry distribution in your application. The distribution includes a transitive dependency on the OpenTelemetry SDK, so you do not need to add the OpenTelemetry SDK package to your project. Doing so will cause no harm and may be used to opt into newer SDK versions before the Elastic distribution references them.\\n\\nThe Elastic OpenTelemetry distribution is designed to be easy to use and integrate into your applications, including those that have previously used the OpenTelemetry SDK directly. When the OpenTelemetry SDK is already being used, the only required change is to add the Elastic.OpenTelemetry NuGet package to the project. Doing so will automatically switch to the opinionated configuration provided by the Elastic distribution.\\n\\n### ASP.NET Core example\\n\\nA common requirement is to instrument ASP.NET Core applications based on **Microsoft.Extensions.Hosting** libraries, which provide dependency injection via an **IServiceProvider**.\\n\\nThe OpenTelemetry SDK and the Elastic distribution include extension methods to enable observability features in your application by adding a few lines of code.\\n\\nThis example focuses on adding instrumentation to an ASP.NET Core minimal API application using the Elastic OpenTelemetry distribution. Similar steps can also be applied to instrument other ASP.NET Core workloads and host-based applications such as Worker Services.\\n\\n_NOTE: This example assumes that we start with a new_ [_minimal API project_](https://learn.microsoft.com/en-us/aspnet/core/tutorials/min-web-api) _created using project templates available with the_ [_.NET 8 SDK_](https://dotnet.microsoft.com/en-us/download/dotnet/8.0)_. It also uses top-level statements inside a single Program.cs file._\\n\\nAdd the **Elastic.OpenTelemetry** package reference to the project (csproj) file.\\n\\n```xml\\n\\n```\\n\\nTo take advantage of the OpenTelemetry SDK instrumentation for ASP.NET Core, also add the **OpenTelemetry.Instrumentation.AspNetCore** NuGet package.\\n\\n```xml\\n\\n```\\n\\nThis package includes support to collect instrumentation (traces and metrics) for requests handled by ASP.NET Core endpoints.\\n\\nInside the **Program.cs** file of the ASP.NET Core application, add the following two using directives:\\n\\n```csharp\\nusing OpenTelemetry;\\nusing OpenTelemetry.Trace;\\n```\\n\\nThe OpenTelemetry SDK includes extension methods on the **IServiceCollection** to enable and configure the trace, metric, and log providers. The Elastic distribution overrides the default SDK registration, adding several opinionated defaults.\\n\\nIn the minimal API template, the **WebApplicationBuilder** exposes a **Services** property that can be used to register services with the dependency injection container. Ensure that the OpenTelemetry SDK is registered to enable tracing and metrics collection.\\n\\n```csharp\\nvar builder = WebApplication.CreateBuilder(args);\\n\\nbuilder.Services\\n .AddHttpClient() // <1>\\n .AddOpenTelemetry() // <2>\\n .WithTracing(t => t.AddAspNetCoreInstrumentation()); // <3>\\n```\\n\\n> \\\\<1\\\\> AddHttpClient registers the IHttpClientFactory service with the dependency injection container. This is _not_ required to enable OpenTelemetry, but the example endpoint will use it to send an HTTP request.\\n>\\n> \\\\<2\\\\> AddOpenTelemetry registers the OpenTelemetry SDK with the dependency injection container. When available, the Elastic distribution will override this to add opinionated defaults.\\n>\\n> \\\\<3\\\\> Configures OpenTelemetry tracing to collect tracing and metric data produced by ASP.NET Core.\\n\\nWith these limited changes to the Program.cs file, the application is now configured to use the OpenTelemetry SDK and the Elastic distribution to collect traces and metrics, which are exported via OTLP.\\n\\nTo demonstrate the tracing capabilities, we will define a single endpoint for the API via the **WebApplication**.\\n\\n```csharp\\nvar app = builder.Build();\\n\\napp.UseHttpsRedirection();\\n\\napp.MapGet(\\"/\\", (IHttpClientFactory httpClientFactory) =>\\n Api.HandleRoot(httpClientFactory)); // <1>\\n\\napp.Run();\\n```\\n\\n> \\\\<1\\\\> Maps an endpoint that handles requests to the application\'s root URL path. The handler will be supplied from a static class that we also need to add to the application. It accepts an **IHttpClientFactory** as a parameter, which will be injected from the dependency injection container at runtime and passed as an argument to the **HandleRoot** method.\\n\\n```csharp\\n\\nnamespace Example.Api\\n{\\n internal static class Api\\n {\\n public static async Task HandleRoot(IHttpClientFactory httpClientFactory)\\n {\\n using var client = httpClientFactory.CreateClient();\\n\\n await Task.Delay(100); // simulate work\\n var response = await client.GetAsync(\\"https://elastic.co\\"); // <1>\\n await Task.Delay(50); // simulate work\\n\\n return response.StatusCode == System.Net.HttpStatusCode.OK ? Results.Ok() : Results.StatusCode(500);\\n }\\n }\\n}\\n```\\n\\n> \\\\<1\\\\> This URL will require two redirects, allowing us to see multiple spans in the trace.\\n\\nThis static class includes a **HandleRoot** method that matches the signature for the endpoint handler delegate.\\n\\nAfter creating a **HttpClient** from the factory, it sends a GET request to the elastic.co website. Either side of the request is a delay, which is used here to simulate some business logic being executed. The method returns a suitable status code based on the result of the external HTTP request.\\n\\nIf you’re following along, you will also need to include a using directive for the **Example.Api** namespace in your Program.cs file.\\n\\n```csharp\\nusing Example.Api;\\n```\\n\\nThat is all of the code we require for now. The Elastic distribution will automatically enable the exporting of telemetry signals via the OTLP exporter. The OTLP exporter requires that endpoint(s) be configured. A common mechanism for configuring endpoints is via environment variables.\\n\\nThis demo uses an Elastic Cloud deployment as the destination for our observability data. To retrieve the endpoint information from Kibana\xae running in Elastic Cloud, navigate to the observability setup guides. Select the OpenTelemetry option to view the configuration details that should be supplied to the application.\\n\\n![apm agents image](/assets/images/elastic-opentelemetry-distribution-dotnet-applications/1-apm-agents.png)\\n\\nConfigure environment variables for the application either in launchSettings.json or in the environment where the application is running. The authorization header bearer token should be stored securely, in user secrets or a suitable key vault system.\\n\\nAt a minimum, we must configure two environment variables:\\n\\n- OTEL_EXPORTER_OTLP_ENDPOINT\\n\\n- OTLP_EXPORTER_OTLP_HEADERS\\n\\nIt is also highly recommended to configure at least a descriptive service name for the application using the OTEL_RESOURCE_ATTRIBUTES environment variable otherwise a generic default will be applied. For example:\\n\\n```bash\\n\\"OTEL_RESOURCE_ATTRIBUTES\\": \\"service.name=minimal-api-example\\"\\n```\\n\\nAdditional resource tags, such as version, can and should be added as appropriate. You can read more about the options for configuring resource attributes in the [OpenTelemetry .NET SDK documentation](https://opentelemetry.io/docs/languages/net/resources/).\\n\\nOnce configured, run the application and make an HTTP request to its root endpoint. A trace will be generated and exported to the configured OTLP endpoint.\\n\\nTo view the traces, you can use the Elastic APM Kibana UI. From the Kibana home page, visit the Observability area and from a trace under the APM \\\\> Traces page. After selecting a suitable time frame and choosing the trace named “GET /,” you will be able to explore one or more trace samples.\\n\\n![trace sample](/assets/images/elastic-opentelemetry-distribution-dotnet-applications/2-trace-sample.png)\\n\\nThe above trace demonstrates the built-in instrumentation collection provided by the OpenTelemetry SDK and the optional **OpenTelemetry.Instrumentation.AspNetCore** package that we added.\\n\\nIt’s important to highlight that we would see a different trace above if we had used the “vanilla” SDK without the Elastic distribution. The HTTP spans that appear in blue in the screenshot would not be shown. By default, the OpenTelemetry SDK does not enable HTTP instrumentation, and it would require additional code to configure the instrumentation of outbound HTTP requests. The Elastic distribution takes the opinion that HTTP spans should be captured and enables this feature by default.\\n\\nIt is also possible to add application-specific instrumentation to this application. Typically, this would require calling vendor-specific APIs, for example, the [tracer API](https://www.elastic.co/guide/en/apm/agent/dotnet/current/public-api.html#api-tracer-api) in Elastic APM Agent. A significant benefit of choosing OpenTelemetry is the capability to use vendor-neutral APIs to instrument code with no vendor lock-in. We can see that in action by updating the **API** class in the sample.\\n\\n```csharp\\ninternal static class Api\\n{\\n public static string ActivitySourceName = \\"CustomActivitySource\\";\\n private static readonly ActivitySource ActivitySource = new(ActivitySourceName);\\n\\n public static async Task HandleRoot(IHttpClientFactory httpClientFactory)\\n {\\n using var activity = ActivitySource.StartActivity(\\"DoingStuff\\", ActivityKind.Internal);\\n activity?.SetTag(\\"custom-tag\\", \\"TagValue\\");\\n\\n using var client = httpClientFactory.CreateClient();\\n\\n await Task.Delay(100);\\n var response = await client.GetAsync(\\"https://elastic.co\\"); // using this URL will require 2 redirects\\n await Task.Delay(50);\\n\\n if (response.StatusCode == System.Net.HttpStatusCode.OK)\\n {\\n activity?.SetStatus(ActivityStatusCode.Ok);\\n return Results.Ok();\\n }\\n\\n activity?.SetStatus(ActivityStatusCode.Error);\\n return Results.StatusCode(500);\\n }\\n}\\n```\\n\\nThe preceding code snippet defines a private static **ActivitySource** field inside the **Api** class. Inside the **HandleRoot** method, an **Activity** is started using the ActivitySource, and several tags are set. The **ActivitySource** and **Activity** types are defined in the .NET BCL (base class library) and are defined in the **System.Diagnostics** namespace. A using directive is required to use them.\\n\\n```csharp\\nusing System.Diagnostics;\\n```\\n\\nBy using the Activity APIs to instrument the above code, we are not tied to any specific vendor APM solution. To learn more about using the .NET APIs to instrument code in an OpenTelemetry native way, visit the [Microsoft Learn page covering distributed tracing instrumentation](https://learn.microsoft.com/en-us/dotnet/core/diagnostics/distributed-tracing-instrumentation-walkthroughs).\\n\\nThe last modification we must apply will instruct OpenTelemetry to observe spans from our application-specific **ActivitySource**. This is achieved by updating the registration of the OpenTelemetry components with the dependency injection framework.\\n\\n```csharp\\nbuilder.Services\\n .AddHttpClient()\\n .AddOpenTelemetry()\\n .WithTracing(t => t\\n .AddAspNetCoreInstrumentation()\\n .AddSource(Api.ActivitySourceName)); // <1>\\n```\\n\\n> \\\\<1\\\\> AddSource subscribes the OpenTelemetry SDK to spans (activities) produced by our application code.\\n\\nA new trace will be collected and exported after making these changes, rerunning the application, and requesting the root endpoint. The latest trace can be viewed in the Kibana observability UI.\\n\\n![timeline](/assets/images/elastic-opentelemetry-distribution-dotnet-applications/3-timeline.png)\\n\\nThe trace waterfall now includes the internal “DoingStuff” span produced by the instrumentation that we added to our application code. The HTTP spans still appear and are now child spans of the “DoingStuff” span.\\n\\nWe’re working on writing more thorough documentation to be published on elastic.co. Until then, you can find more information in our repository [readme](https://github.com/elastic/elastic-otel-dotnet/blob/main/README.md) and the [docs folder](https://github.com/elastic/elastic-otel-dotnet/tree/main/docs).\\n\\nAs the distribution is designed to extend the capabilities of the OpenTelemetry SDK with limited impact on the code used to register the SDK, we recommend visiting the [OpenTelemetry documentation for .NET](https://opentelemetry.io/docs/languages/net/) to learn about the instrumenting code and provide a more advanced configuration of the SDK.\\n\\n## What are the next steps?\\n\\nWe are very excited to expand our support of the OpenTelemetry community and contribute to its future within the .NET ecosystem. This is the compelling next step toward greater collaboration between all observability vendors to provide a rich ecosystem supporting developers on their journey to improved application observability with zero vendor lock-in.\\n\\nAt this stage, we strongly appreciate any feedback the .NET community and our customers can provide to guide the direction of our OpenTelemetry distribution. Please [try out our distribution](https://www.nuget.org/packages/Elastic.OpenTelemetry) and engage with us through our [GitHub repository](https://github.com/elastic/elastic-otel-dotnet).\\n\\nIn the coming weeks and months, we will focus on stabilizing the distribution\'s API and porting Elastic APM Agent features into the distribution. In parallel, we expect to start donating and contributing features to the broader OpenTelemetry community via the [OpenTelemetry GitHub repositories](https://github.com/open-telemetry/).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var i in e)r(n,i,{get:e[i],enumerable:!0})},s=(n,e,i,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!y.call(n,a)&&a!==i&&r(n,a,{get:()=>e[a],enumerable:!(o=u(e,a))||o.enumerable});return n};var T=(n,e,i)=>(i=n!=null?p(g(n)):{},s(e||!n||!n.__esModule?r(i,\\"default\\",{value:n,enumerable:!0}):i,n)),w=n=>s(r({},\\"__esModule\\",{value:!0}),n);var c=f((S,l)=>{l.exports=_jsx_runtime});var A={};b(A,{default:()=>h,frontmatter:()=>v});var t=T(c()),v={title:\\"Introducing Elastic\'s OpenTelemetry SDK for .NET\\",slug:\\"elastic-opentelemetry-distribution-dotnet-applications\\",date:\\"2024-04-02\\",subtitle:\\"Adopting OpenTelemetry native solutions for observing .NET applications\\",description:\\"Today, we are excited to announce the alpha release of our new Elastic distribution of the OpenTelemetry SDK for .NET. In this post, we cover a few likely questions you may have about this new distribution and explain how to get started.\\",author:[{slug:\\"steve-gordon\\"},{slug:\\"martijn-laarman\\"}],image:\\"OTel-1.jpg\\",tags:[{slug:\\"net\\"},{slug:\\"opentelemetry\\"},{slug:\\"apm\\"}]};function d(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"We are thrilled to announce the alpha release of our new \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-dotnet/releases\\",rel:\\"nofollow\\",children:\\"Elastic\\\\xAE distribution of the OpenTelemetry SDK for .NET\\"}),\\". In this post, we cover a few reasonable questions you may have about this new distribution.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Download the \\",(0,t.jsx)(e.a,{href:\\"https://www.nuget.org/packages/Elastic.OpenTelemetry\\",rel:\\"nofollow\\",children:\\"NuGet package\\"}),\\" today if you want to try out this early access release. We welcome all feedback and suggestions to help us enhance the distribution before its stable release.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions\\",rel:\\"nofollow\\",children:\\"Check out our announcement blog post\\"}),\\" to learn more about OpenTelemetry and our decision to introduce OpenTelemetry distributions.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"the-elastic-net-opentelemetry-distribution\\",children:\\"The Elastic .NET OpenTelemetry distribution\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With the alpha release of the Elastic distribution of the .NET OpenTelemetry SDK, we are embracing OpenTelemetry as the preferred and recommended choice for instrumenting .NET applications.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In .NET, the runtime base class libraries (BCL) include types designed for native OpenTelemetry instrumentation, such as \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/dotnet/api/system.diagnostics.activity\\",rel:\\"nofollow\\",children:\\"Activity\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/dotnet/api/system.diagnostics.metrics.meter\\",rel:\\"nofollow\\",children:\\"Meter\\"}),\\", making adopting OpenTelemetry-native instrumentation even more convenient.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The current alpha release of our distribution is consciously feature-limited. Our goal is to assess the fitness of the API design and ease of use, laying a solid foundation going forward. We acknowledge that it is likely not suited to all application scenarios, so while we welcome developers installing it to try it out, we don\\\\u2019t currently advise using it for production.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In subsequent releases, we plan to add more features as we move toward feature parity with the existing Elastic APM agent for .NET. Based on user feedback, we will refine the API and move toward a stable release. Until then, we may need to make some breaking API changes to support additional use cases.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The current alpha release supports installation in typical modern workloads such as \\",(0,t.jsx)(e.a,{href:\\"https://dotnet.microsoft.com/en-us/apps/aspnet\\",rel:\\"nofollow\\",children:\\"ASP.NET Core\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/dotnet/core/extensions/workers\\",rel:\\"nofollow\\",children:\\"worker services\\"}),\\". It best supports modern .NET runtimes, .NET 6.0 and later. We\\\\u2019d love to hear about other scenarios you think we should focus on next.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The types we introduce in the distribution are to support an easy switch from the \\\\u201Cvanilla\\\\u201D OpenTelemetry SDK with no (or minimal) code changes. We expect that for most circumstances, merely adding the NuGet package is all that is required to get started.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The initial alpha releases add very little on top of the \\\\u201Cvanilla\\\\u201D SDK from OpenTelemetry, but by adopting it early, you can shape its direction. We will deliver valuable enhancements to developers in subsequent releases.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you\\\\u2019d like to follow the development of the distribution, the code is fully open source and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-dotnet\\",rel:\\"nofollow\\",children:\\"available on GitHub\\"}),\\". We encourage you to raise issues for bugs or usability pain points you encounter.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"how-do-i-get-started\\",children:\\"How do I get started?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Getting started with the Elastic OpenTelemetry distribution is really easy. Simply add a reference to the Elastic OpenTelemetry NuGet package to your project. This can be achieved by adding a package reference to the project (csproj) file.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-xml\\",children:`\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"After adding the package reference, you can use the Elastic OpenTelemetry distribution in your application. The distribution includes a transitive dependency on the OpenTelemetry SDK, so you do not need to add the OpenTelemetry SDK package to your project. Doing so will cause no harm and may be used to opt into newer SDK versions before the Elastic distribution references them.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Elastic OpenTelemetry distribution is designed to be easy to use and integrate into your applications, including those that have previously used the OpenTelemetry SDK directly. When the OpenTelemetry SDK is already being used, the only required change is to add the Elastic.OpenTelemetry NuGet package to the project. Doing so will automatically switch to the opinionated configuration provided by the Elastic distribution.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"aspnet-core-example\\",children:\\"ASP.NET Core example\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"A common requirement is to instrument ASP.NET Core applications based on \\",(0,t.jsx)(e.strong,{children:\\"Microsoft.Extensions.Hosting\\"}),\\" libraries, which provide dependency injection via an \\",(0,t.jsx)(e.strong,{children:\\"IServiceProvider\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The OpenTelemetry SDK and the Elastic distribution include extension methods to enable observability features in your application by adding a few lines of code.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This example focuses on adding instrumentation to an ASP.NET Core minimal API application using the Elastic OpenTelemetry distribution. Similar steps can also be applied to instrument other ASP.NET Core workloads and host-based applications such as Worker Services.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.em,{children:\\"NOTE: This example assumes that we start with a new\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/aspnet/core/tutorials/min-web-api\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.em,{children:\\"minimal API project\\"})}),\\" \\",(0,t.jsx)(e.em,{children:\\"created using project templates available with the\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://dotnet.microsoft.com/en-us/download/dotnet/8.0\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.em,{children:\\".NET 8 SDK\\"})}),(0,t.jsx)(e.em,{children:\\". It also uses top-level statements inside a single Program.cs file.\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Add the \\",(0,t.jsx)(e.strong,{children:\\"Elastic.OpenTelemetry\\"}),\\" package reference to the project (csproj) file.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-xml\\",children:`\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To take advantage of the OpenTelemetry SDK instrumentation for ASP.NET Core, also add the \\",(0,t.jsx)(e.strong,{children:\\"OpenTelemetry.Instrumentation.AspNetCore\\"}),\\" NuGet package.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-xml\\",children:`\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This package includes support to collect instrumentation (traces and metrics) for requests handled by ASP.NET Core endpoints.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Inside the \\",(0,t.jsx)(e.strong,{children:\\"Program.cs\\"}),\\" file of the ASP.NET Core application, add the following two using directives:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-csharp\\",children:`using OpenTelemetry;\\nusing OpenTelemetry.Trace;\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The OpenTelemetry SDK includes extension methods on the \\",(0,t.jsx)(e.strong,{children:\\"IServiceCollection\\"}),\\" to enable and configure the trace, metric, and log providers. The Elastic distribution overrides the default SDK registration, adding several opinionated defaults.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the minimal API template, the \\",(0,t.jsx)(e.strong,{children:\\"WebApplicationBuilder\\"}),\\" exposes a \\",(0,t.jsx)(e.strong,{children:\\"Services\\"}),\\" property that can be used to register services with the dependency injection container. Ensure that the OpenTelemetry SDK is registered to enable tracing and metrics collection.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-csharp\\",children:`var builder = WebApplication.CreateBuilder(args);\\n\\nbuilder.Services\\n .AddHttpClient() // <1>\\n .AddOpenTelemetry() // <2>\\n .WithTracing(t => t.AddAspNetCoreInstrumentation()); // <3>\\n`})}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"<1> AddHttpClient registers the IHttpClientFactory service with the dependency injection container. This is \\",(0,t.jsx)(e.em,{children:\\"not\\"}),\\" required to enable OpenTelemetry, but the example endpoint will use it to send an HTTP request.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"<2> AddOpenTelemetry registers the OpenTelemetry SDK with the dependency injection container. When available, the Elastic distribution will override this to add opinionated defaults.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"<3> Configures OpenTelemetry tracing to collect tracing and metric data produced by ASP.NET Core.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With these limited changes to the Program.cs file, the application is now configured to use the OpenTelemetry SDK and the Elastic distribution to collect traces and metrics, which are exported via OTLP.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To demonstrate the tracing capabilities, we will define a single endpoint for the API via the \\",(0,t.jsx)(e.strong,{children:\\"WebApplication\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-csharp\\",children:`var app = builder.Build();\\n\\napp.UseHttpsRedirection();\\n\\napp.MapGet(\\"/\\", (IHttpClientFactory httpClientFactory) =>\\n Api.HandleRoot(httpClientFactory)); // <1>\\n\\napp.Run();\\n`})}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"<1> Maps an endpoint that handles requests to the application\'s root URL path. The handler will be supplied from a static class that we also need to add to the application. It accepts an \\",(0,t.jsx)(e.strong,{children:\\"IHttpClientFactory\\"}),\\" as a parameter, which will be injected from the dependency injection container at runtime and passed as an argument to the \\",(0,t.jsx)(e.strong,{children:\\"HandleRoot\\"}),\\" method.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-csharp\\",children:`\\nnamespace Example.Api\\n{\\n internal static class Api\\n {\\n public static async Task HandleRoot(IHttpClientFactory httpClientFactory)\\n {\\n using var client = httpClientFactory.CreateClient();\\n\\n await Task.Delay(100); // simulate work\\n var response = await client.GetAsync(\\"https://elastic.co\\"); // <1>\\n await Task.Delay(50); // simulate work\\n\\n return response.StatusCode == System.Net.HttpStatusCode.OK ? Results.Ok() : Results.StatusCode(500);\\n }\\n }\\n}\\n`})}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"<1> This URL will require two redirects, allowing us to see multiple spans in the trace.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This static class includes a \\",(0,t.jsx)(e.strong,{children:\\"HandleRoot\\"}),\\" method that matches the signature for the endpoint handler delegate.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"After creating a \\",(0,t.jsx)(e.strong,{children:\\"HttpClient\\"}),\\" from the factory, it sends a GET request to the elastic.co website. Either side of the request is a delay, which is used here to simulate some business logic being executed. The method returns a suitable status code based on the result of the external HTTP request.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you\\\\u2019re following along, you will also need to include a using directive for the \\",(0,t.jsx)(e.strong,{children:\\"Example.Api\\"}),\\" namespace in your Program.cs file.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-csharp\\",children:`using Example.Api;\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"That is all of the code we require for now. The Elastic distribution will automatically enable the exporting of telemetry signals via the OTLP exporter. The OTLP exporter requires that endpoint(s) be configured. A common mechanism for configuring endpoints is via environment variables.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This demo uses an Elastic Cloud deployment as the destination for our observability data. To retrieve the endpoint information from Kibana\\\\xAE running in Elastic Cloud, navigate to the observability setup guides. Select the OpenTelemetry option to view the configuration details that should be supplied to the application.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-distribution-dotnet-applications/1-apm-agents.png\\",alt:\\"apm agents image\\",width:\\"1215\\",height:\\"817\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Configure environment variables for the application either in launchSettings.json or in the environment where the application is running. The authorization header bearer token should be stored securely, in user secrets or a suitable key vault system.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"At a minimum, we must configure two environment variables:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"OTEL_EXPORTER_OTLP_ENDPOINT\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"OTLP_EXPORTER_OTLP_HEADERS\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"It is also highly recommended to configure at least a descriptive service name for the application using the OTEL_RESOURCE_ATTRIBUTES environment variable otherwise a generic default will be applied. For example:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`\\"OTEL_RESOURCE_ATTRIBUTES\\": \\"service.name=minimal-api-example\\"\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Additional resource tags, such as version, can and should be added as appropriate. You can read more about the options for configuring resource attributes in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/languages/net/resources/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry .NET SDK documentation\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once configured, run the application and make an HTTP request to its root endpoint. A trace will be generated and exported to the configured OTLP endpoint.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To view the traces, you can use the Elastic APM Kibana UI. From the Kibana home page, visit the Observability area and from a trace under the APM > Traces page. After selecting a suitable time frame and choosing the trace named \\\\u201CGET /,\\\\u201D you will be able to explore one or more trace samples.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-distribution-dotnet-applications/2-trace-sample.png\\",alt:\\"trace sample\\",width:\\"848\\",height:\\"431\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The above trace demonstrates the built-in instrumentation collection provided by the OpenTelemetry SDK and the optional \\",(0,t.jsx)(e.strong,{children:\\"OpenTelemetry.Instrumentation.AspNetCore\\"}),\\" package that we added.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"It\\\\u2019s important to highlight that we would see a different trace above if we had used the \\\\u201Cvanilla\\\\u201D SDK without the Elastic distribution. The HTTP spans that appear in blue in the screenshot would not be shown. By default, the OpenTelemetry SDK does not enable HTTP instrumentation, and it would require additional code to configure the instrumentation of outbound HTTP requests. The Elastic distribution takes the opinion that HTTP spans should be captured and enables this feature by default.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"It is also possible to add application-specific instrumentation to this application. Typically, this would require calling vendor-specific APIs, for example, the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/dotnet/current/public-api.html#api-tracer-api\\",rel:\\"nofollow\\",children:\\"tracer API\\"}),\\" in Elastic APM Agent. A significant benefit of choosing OpenTelemetry is the capability to use vendor-neutral APIs to instrument code with no vendor lock-in. We can see that in action by updating the \\",(0,t.jsx)(e.strong,{children:\\"API\\"}),\\" class in the sample.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-csharp\\",children:`internal static class Api\\n{\\n public static string ActivitySourceName = \\"CustomActivitySource\\";\\n private static readonly ActivitySource ActivitySource = new(ActivitySourceName);\\n\\n public static async Task HandleRoot(IHttpClientFactory httpClientFactory)\\n {\\n using var activity = ActivitySource.StartActivity(\\"DoingStuff\\", ActivityKind.Internal);\\n activity?.SetTag(\\"custom-tag\\", \\"TagValue\\");\\n\\n using var client = httpClientFactory.CreateClient();\\n\\n await Task.Delay(100);\\n var response = await client.GetAsync(\\"https://elastic.co\\"); // using this URL will require 2 redirects\\n await Task.Delay(50);\\n\\n if (response.StatusCode == System.Net.HttpStatusCode.OK)\\n {\\n activity?.SetStatus(ActivityStatusCode.Ok);\\n return Results.Ok();\\n }\\n\\n activity?.SetStatus(ActivityStatusCode.Error);\\n return Results.StatusCode(500);\\n }\\n}\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The preceding code snippet defines a private static \\",(0,t.jsx)(e.strong,{children:\\"ActivitySource\\"}),\\" field inside the \\",(0,t.jsx)(e.strong,{children:\\"Api\\"}),\\" class. Inside the \\",(0,t.jsx)(e.strong,{children:\\"HandleRoot\\"}),\\" method, an \\",(0,t.jsx)(e.strong,{children:\\"Activity\\"}),\\" is started using the ActivitySource, and several tags are set. The \\",(0,t.jsx)(e.strong,{children:\\"ActivitySource\\"}),\\" and \\",(0,t.jsx)(e.strong,{children:\\"Activity\\"}),\\" types are defined in the .NET BCL (base class library) and are defined in the \\",(0,t.jsx)(e.strong,{children:\\"System.Diagnostics\\"}),\\" namespace. A using directive is required to use them.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-csharp\\",children:`using System.Diagnostics;\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"By using the Activity APIs to instrument the above code, we are not tied to any specific vendor APM solution. To learn more about using the .NET APIs to instrument code in an OpenTelemetry native way, visit the \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/dotnet/core/diagnostics/distributed-tracing-instrumentation-walkthroughs\\",rel:\\"nofollow\\",children:\\"Microsoft Learn page covering distributed tracing instrumentation\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The last modification we must apply will instruct OpenTelemetry to observe spans from our application-specific \\",(0,t.jsx)(e.strong,{children:\\"ActivitySource\\"}),\\". This is achieved by updating the registration of the OpenTelemetry components with the dependency injection framework.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-csharp\\",children:`builder.Services\\n .AddHttpClient()\\n .AddOpenTelemetry()\\n .WithTracing(t => t\\n .AddAspNetCoreInstrumentation()\\n .AddSource(Api.ActivitySourceName)); // <1>\\n`})}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"<1> AddSource subscribes the OpenTelemetry SDK to spans (activities) produced by our application code.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"A new trace will be collected and exported after making these changes, rerunning the application, and requesting the root endpoint. The latest trace can be viewed in the Kibana observability UI.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-distribution-dotnet-applications/3-timeline.png\\",alt:\\"timeline\\",width:\\"850\\",height:\\"484\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The trace waterfall now includes the internal \\\\u201CDoingStuff\\\\u201D span produced by the instrumentation that we added to our application code. The HTTP spans still appear and are now child spans of the \\\\u201CDoingStuff\\\\u201D span.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We\\\\u2019re working on writing more thorough documentation to be published on elastic.co. Until then, you can find more information in our repository \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-dotnet/blob/main/README.md\\",rel:\\"nofollow\\",children:\\"readme\\"}),\\" and the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-dotnet/tree/main/docs\\",rel:\\"nofollow\\",children:\\"docs folder\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As the distribution is designed to extend the capabilities of the OpenTelemetry SDK with limited impact on the code used to register the SDK, we recommend visiting the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/languages/net/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry documentation for .NET\\"}),\\" to learn about the instrumenting code and provide a more advanced configuration of the SDK.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-are-the-next-steps\\",children:\\"What are the next steps?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We are very excited to expand our support of the OpenTelemetry community and contribute to its future within the .NET ecosystem. This is the compelling next step toward greater collaboration between all observability vendors to provide a rich ecosystem supporting developers on their journey to improved application observability with zero vendor lock-in.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"At this stage, we strongly appreciate any feedback the .NET community and our customers can provide to guide the direction of our OpenTelemetry distribution. Please \\",(0,t.jsx)(e.a,{href:\\"https://www.nuget.org/packages/Elastic.OpenTelemetry\\",rel:\\"nofollow\\",children:\\"try out our distribution\\"}),\\" and engage with us through our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-dotnet\\",rel:\\"nofollow\\",children:\\"GitHub repository\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the coming weeks and months, we will focus on stabilizing the distribution\'s API and porting Elastic APM Agent features into the distribution. In parallel, we expect to start donating and contributing features to the broader OpenTelemetry community via the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry GitHub repositories\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return w(A);})();\\n;return Component;"},"_id":"articles/elastic-opentelemetry-distribution-dotnet-applications.mdx","_raw":{"sourceFilePath":"articles/elastic-opentelemetry-distribution-dotnet-applications.mdx","sourceFileName":"elastic-opentelemetry-distribution-dotnet-applications.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-opentelemetry-distribution-dotnet-applications"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-distribution-dotnet-applications/OTel-1.jpg","readingTime":"13 min read","url":"/elastic-opentelemetry-distribution-dotnet-applications","headings":[{"level":2,"title":"The Elastic .NET OpenTelemetry distribution","href":"#the-elastic-net-opentelemetry-distribution"},{"level":2,"title":"How do I get started?","href":"#how-do-i-get-started"},{"level":3,"title":"ASP.NET Core example","href":"#aspnet-core-example"},{"level":2,"title":"What are the next steps?","href":"#what-are-the-next-steps"}]},{"title":"Introducing Elastic\'s OpenTelemetry Distribution for Node.js","slug":"elastic-opentelemetry-distribution-node-js","date":"2024-05-06","description":"Announcing the first alpha release of the Elastic OpenTelemetry Distribution for Node.js. See how easy it is to instrument your Node.js applications with OpenTelemetry in this blog post.","image":"Node-js.jpeg","author":[{"slug":"trent-mick","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}}],"body":{"raw":"\\nWe are delighted to announce the alpha release of the [Elastic OpenTelemetry Distribution for Node.js](https://github.com/elastic/elastic-otel-node/tree/main/packages/opentelemetry-node#readme). This distribution is a light wrapper around the OpenTelemetry Node.js SDK that makes it easier to get started using OpenTelemetry to observe your Node.js applications.\\n\\n## Background\\n\\nElastic is standardizing on OpenTelemetry (OTel) for observability and security data collection. As part of that effort, we are [providing distributions of the OpenTelemetry Language SDKs](https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions). Our [Android](https://github.com/elastic/apm-agent-android#readme) and [iOS](https://github.com/elastic/apm-agent-ios#readme) SDKs have been OpenTelemetry-based from the start, and we have recently released alpha distributions for [Java](https://github.com/elastic/elastic-otel-java#readme) and [.NET](https://github.com/elastic/elastic-otel-dotnet#readme). The Elastic OpenTelemetry Distribution for Node.js is the latest addition.\\n\\n## Getting started\\n\\nTo get started with the Elastic OTel Distribution for Node.js (the \\"distro\\"), you need only install and load a single npm dependency (@elastic/opentelemetry-node). The distro sets up the collection of traces, metrics, and logs for a number of popular Node.js packages. It sends data to any OTLP endpoint you configure. This could be a standard OTel Collector or, as shown below, an Elastic Observability cloud deployment.\\n\\n```bash\\nnpm install --save @elastic/opentelemetry-node # (1) install the SDK\\n\\n# (2) configure it, for example:\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=https://my-deployment.apm.us-west1.gcp.cloud.es.io\\nexport OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer ...REDACTED...\\"\\nexport OTEL_SERVICE_NAME=my-service\\n\\n# (3) load and start it\\nnode --require @elastic/opentelemetry-node my-service.js\\n```\\n\\n## A small example with Express and PostgreSQL\\n\\nFor a concrete example, let\'s look at a small Node.js \\"Shortlinks\\" service implemented using the [Express](https://expressjs.com/) web framework and the [pg](https://node-postgres.com/)[](https://node-postgres.com/)[PostgreSQL client package](https://node-postgres.com/). This service provides a POST / route for creating short links (a short name for a URL) and a GET /:shortname route for using them.\\n\\n![Recent shortlinks](/assets/images/elastic-opentelemetry-distribution-node-js/recent_shortlinks.png)\\n\\nThe git repository is [here](https://github.com/elastic/elastic-otel-node-example). The [README](https://github.com/elastic/elastic-otel-node-example#readme) shows how to create a free trial Elastic cloud deployment and get the appropriate OTEL\\\\_... config settings. Try it out (prerequisites are Docker and Node.js v20 or later):\\n\\n```bash\\ngit clone https://github.com/elastic/elastic-otel-node-example.git\\ncd elastic-otel-node-example\\nnpm install\\n\\ncp config.env.template config.env\\n# Edit OTEL_ values in \\"config.env\\" to point to your collection endpoint.\\n\\nnpm run db:start\\nnpm start\\n```\\n\\nThe only steps needed to set up observability are [these small changes](https://github.com/elastic/elastic-otel-node-example/blob/v1.0.0/package.json#L30-L33) to the \\"package.json\\" file and configuring a few standard OTEL\\\\_... environment variables.\\n\\n```json\\n// ...\\n \\"scripts\\": {\\n\\t\\"start\\": \\"node --env-file=./config.env -r @elastic/opentelemetry-node lib/app.js\\"\\n },\\n \\"dependencies\\": {\\n\\t\\"@elastic/opentelemetry-node\\": \\"*\\",\\n // ...\\n```\\n\\nThe result is an observable application using the industry-standard [OpenTelemetry](https://opentelemetry.io/) — offering high-quality instrumentation of many popular Node.js libraries, a portable API to avoid vendor lock-in, and an active community.\\n\\nUsing Elastic Observability, some out-of-the-box benefits you can expect are: rich trace viewing, Service maps, integrated metrics and log analysis, and more. The distro ships [host-metrics](https://github.com/open-telemetry/opentelemetry-js-contrib#readme) and Kibana provides a curated service metrics UI. There is out-of-the-box sending of logs for the popular [Winston](https://github.com/winstonjs/winston) and [Bunyan](https://github.com/trentm/node-bunyan) logging frameworks, with support planned for [Pino](https://getpino.io).\\n\\n![trace sample screenshot](/assets/images/elastic-opentelemetry-distribution-node-js/trace_sample.png)\\n\\n## What\'s next?\\n\\nElastic is committed to helping OpenTelemetry succeed and to helping our customers use OpenTelemetry effectively in their systems. Last year, we [donated ECS](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/) and continue to work on integrating it with OpenTelemetry Semantic Conventions. More recently, we are working on [donating our eBPF-based profiler](https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry) to OpenTelemetry. We contribute to many of the language SDKs and other OpenTelemetry projects.\\n\\nAs authors of the Node.js distribution, we are excited to work with the OpenTelemetry JavaScript community and to help make the JS API & SDK a more robust, featureful, and obvious choice for JavaScript observability. Having a distro gives us the flexibility to build features on top of the vanilla OTel SDK. Currently, some advantages of the distro include: single package for installation, easy auto-instrumentation with reasonable default configuration, ESM enabled by default, and automatic logs telemetry sending. We will certainly contribute features upstream to the OTel JavaScript project when possible and will include additional features in the distro when it makes more sense for them to be there.\\n\\nThe Elastic OpenTelemetry Distribution for Node.js is currently an alpha. Please [try it out](https://github.com/elastic/elastic-otel-node/blob/main/packages/opentelemetry-node/docs/getting-started.mdx) and let us know if it might work for you. Watch for the [latest releases here](https://github.com/elastic/elastic-otel-node/releases). You can engage with us on [the project issue tracker](https://github.com/elastic/elastic-otel-node/issues) or [Elastic\'s Node.js APM Discuss forum](https://discuss.elastic.co/tags/c/apm/nodejs).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var o in e)r(n,o,{get:e[o],enumerable:!0})},s=(n,e,o,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of u(e))!f.call(n,i)&&i!==o&&r(n,i,{get:()=>e[i],enumerable:!(a=m(e,i))||a.enumerable});return n};var w=(n,e,o)=>(o=n!=null?p(g(n)):{},s(e||!n||!n.__esModule?r(o,\\"default\\",{value:n,enumerable:!0}):o,n)),T=n=>s(r({},\\"__esModule\\",{value:!0}),n);var c=y((j,l)=>{l.exports=_jsx_runtime});var E={};b(E,{default:()=>h,frontmatter:()=>v});var t=w(c()),v={title:\\"Introducing Elastic\'s OpenTelemetry Distribution for Node.js\\",slug:\\"elastic-opentelemetry-distribution-node-js\\",date:\\"2024-05-06\\",description:\\"Announcing the first alpha release of the Elastic OpenTelemetry Distribution for Node.js. See how easy it is to instrument your Node.js applications with OpenTelemetry in this blog post.\\",author:[{slug:\\"trent-mick\\"}],image:\\"Node-js.jpeg\\",tags:[{slug:\\"opentelemetry\\"}]};function d(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",p:\\"p\\",pre:\\"pre\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"We are delighted to announce the alpha release of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-node/tree/main/packages/opentelemetry-node#readme\\",rel:\\"nofollow\\",children:\\"Elastic OpenTelemetry Distribution for Node.js\\"}),\\". This distribution is a light wrapper around the OpenTelemetry Node.js SDK that makes it easier to get started using OpenTelemetry to observe your Node.js applications.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"background\\",children:\\"Background\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic is standardizing on OpenTelemetry (OTel) for observability and security data collection. As part of that effort, we are \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions\\",rel:\\"nofollow\\",children:\\"providing distributions of the OpenTelemetry Language SDKs\\"}),\\". Our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-android#readme\\",rel:\\"nofollow\\",children:\\"Android\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-ios#readme\\",rel:\\"nofollow\\",children:\\"iOS\\"}),\\" SDKs have been OpenTelemetry-based from the start, and we have recently released alpha distributions for \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java#readme\\",rel:\\"nofollow\\",children:\\"Java\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-dotnet#readme\\",rel:\\"nofollow\\",children:\\".NET\\"}),\\". The Elastic OpenTelemetry Distribution for Node.js is the latest addition.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"getting-started\\",children:\\"Getting started\\"}),`\\n`,(0,t.jsx)(e.p,{children:\'To get started with the Elastic OTel Distribution for Node.js (the \\"distro\\"), you need only install and load a single npm dependency (@elastic/opentelemetry-node). The distro sets up the collection of traces, metrics, and logs for a number of popular Node.js packages. It sends data to any OTLP endpoint you configure. This could be a standard OTel Collector or, as shown below, an Elastic Observability cloud deployment.\'}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`npm install --save @elastic/opentelemetry-node # (1) install the SDK\\n\\n# (2) configure it, for example:\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=https://my-deployment.apm.us-west1.gcp.cloud.es.io\\nexport OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer ...REDACTED...\\"\\nexport OTEL_SERVICE_NAME=my-service\\n\\n# (3) load and start it\\nnode --require @elastic/opentelemetry-node my-service.js\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"a-small-example-with-express-and-postgresql\\",children:\\"A small example with Express and PostgreSQL\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[`For a concrete example, let\'s look at a small Node.js \\"Shortlinks\\" service implemented using the `,(0,t.jsx)(e.a,{href:\\"https://expressjs.com/\\",rel:\\"nofollow\\",children:\\"Express\\"}),\\" web framework and the \\",(0,t.jsx)(e.a,{href:\\"https://node-postgres.com/\\",rel:\\"nofollow\\",children:\\"pg\\"}),(0,t.jsx)(e.a,{href:\\"https://node-postgres.com/\\",rel:\\"nofollow\\"}),(0,t.jsx)(e.a,{href:\\"https://node-postgres.com/\\",rel:\\"nofollow\\",children:\\"PostgreSQL client package\\"}),\\". This service provides a POST / route for creating short links (a short name for a URL) and a GET /:shortname route for using them.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-distribution-node-js/recent_shortlinks.png\\",alt:\\"Recent shortlinks\\",width:\\"897\\",height:\\"557\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The git repository is \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-node-example\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-node-example#readme\\",rel:\\"nofollow\\",children:\\"README\\"}),\\" shows how to create a free trial Elastic cloud deployment and get the appropriate OTEL_... config settings. Try it out (prerequisites are Docker and Node.js v20 or later):\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/elastic/elastic-otel-node-example.git\\ncd elastic-otel-node-example\\nnpm install\\n\\ncp config.env.template config.env\\n# Edit OTEL_ values in \\"config.env\\" to point to your collection endpoint.\\n\\nnpm run db:start\\nnpm start\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The only steps needed to set up observability are \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-node-example/blob/v1.0.0/package.json#L30-L33\\",rel:\\"nofollow\\",children:\\"these small changes\\"}),\' to the \\"package.json\\" file and configuring a few standard OTEL_... environment variables.\']}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`// ...\\n \\"scripts\\": {\\n\\t\\"start\\": \\"node --env-file=./config.env -r @elastic/opentelemetry-node lib/app.js\\"\\n },\\n \\"dependencies\\": {\\n\\t\\"@elastic/opentelemetry-node\\": \\"*\\",\\n // ...\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The result is an observable application using the industry-standard \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" \\\\u2014 offering high-quality instrumentation of many popular Node.js libraries, a portable API to avoid vendor lock-in, and an active community.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Using Elastic Observability, some out-of-the-box benefits you can expect are: rich trace viewing, Service maps, integrated metrics and log analysis, and more. The distro ships \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-js-contrib#readme\\",rel:\\"nofollow\\",children:\\"host-metrics\\"}),\\" and Kibana provides a curated service metrics UI. There is out-of-the-box sending of logs for the popular \\",(0,t.jsx)(e.a,{href:\\"https://github.com/winstonjs/winston\\",rel:\\"nofollow\\",children:\\"Winston\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/trentm/node-bunyan\\",rel:\\"nofollow\\",children:\\"Bunyan\\"}),\\" logging frameworks, with support planned for \\",(0,t.jsx)(e.a,{href:\\"https://getpino.io\\",rel:\\"nofollow\\",children:\\"Pino\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-distribution-node-js/trace_sample.png\\",alt:\\"trace sample screenshot\\",width:\\"1600\\",height:\\"1049\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"whats-next\\",children:\\"What\'s next?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic is committed to helping OpenTelemetry succeed and to helping our customers use OpenTelemetry effectively in their systems. Last year, we \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"donated ECS\\"}),\\" and continue to work on integrating it with OpenTelemetry Semantic Conventions. More recently, we are working on \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry\\",rel:\\"nofollow\\",children:\\"donating our eBPF-based profiler\\"}),\\" to OpenTelemetry. We contribute to many of the language SDKs and other OpenTelemetry projects.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"As authors of the Node.js distribution, we are excited to work with the OpenTelemetry JavaScript community and to help make the JS API & SDK a more robust, featureful, and obvious choice for JavaScript observability. Having a distro gives us the flexibility to build features on top of the vanilla OTel SDK. Currently, some advantages of the distro include: single package for installation, easy auto-instrumentation with reasonable default configuration, ESM enabled by default, and automatic logs telemetry sending. We will certainly contribute features upstream to the OTel JavaScript project when possible and will include additional features in the distro when it makes more sense for them to be there.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The Elastic OpenTelemetry Distribution for Node.js is currently an alpha. Please \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-node/blob/main/packages/opentelemetry-node/docs/getting-started.mdx\\",rel:\\"nofollow\\",children:\\"try it out\\"}),\\" and let us know if it might work for you. Watch for the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-node/releases\\",rel:\\"nofollow\\",children:\\"latest releases here\\"}),\\". You can engage with us on \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-node/issues\\",rel:\\"nofollow\\",children:\\"the project issue tracker\\"}),\\" or \\",(0,t.jsx)(e.a,{href:\\"https://discuss.elastic.co/tags/c/apm/nodejs\\",rel:\\"nofollow\\",children:\\"Elastic\'s Node.js APM Discuss forum\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return T(E);})();\\n;return Component;"},"_id":"articles/elastic-opentelemetry-distribution-node-js.mdx","_raw":{"sourceFilePath":"articles/elastic-opentelemetry-distribution-node-js.mdx","sourceFileName":"elastic-opentelemetry-distribution-node-js.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-opentelemetry-distribution-node-js"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-distribution-node-js/Node-js.jpeg","readingTime":"4 min read","url":"/elastic-opentelemetry-distribution-node-js","headings":[{"level":2,"title":"Background","href":"#background"},{"level":2,"title":"Getting started","href":"#getting-started"},{"level":2,"title":"A small example with Express and PostgreSQL","href":"#a-small-example-with-express-and-postgresql"},{"level":2,"title":"What\'s next?","href":"#whats-next"}]},{"title":"Introducing Elastic\'s distribution of OpenTelemetry PHP","slug":"elastic-opentelemetry-distribution-php","date":"2024-09-16","description":"Announcing the first alpha release of the Elastic distribution of OpenTelemetry PHP. See how easy it is to instrument your PHP applications with OpenTelemetry in this blog post.","image":"php.jpg","author":[{"slug":"pawel-filipczak","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"php","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nWe’re excited to introduce the first alpha release of [Elastic Distribution for OpenTelemetry PHP](https://github.com/elastic/elastic-otel-php). In this post, you’ll learn how to easily install and set up monitoring for your PHP applications.\\n\\n## Background\\n\\nElastic is standardizing on OpenTelemetry (OTel) for observability and security data collection. As part of that effort, we are [providing distributions of the OpenTelemetry Language SDKs](https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions). Our [Android](https://github.com/elastic/apm-agent-android#readme) and [iOS](https://github.com/elastic/apm-agent-ios#readme) SDKs have been OpenTelemetry-based from the start, and we have recently released alpha distributions for [Java](https://github.com/elastic/elastic-otel-java#readme), [.NET](https://github.com/elastic/elastic-otel-dotnet#readme), [Node.js](https://github.com/elastic/elastic-otel-node#readme) and [Python](https://github.com/elastic/elastic-otel-python#readme). The Elastic distribution of OpenTelemetry PHP is the latest addition.\\n\\n## Getting started\\n\\nTo install Elastic Distribution for OpenTelemetry PHP for your application, download the appropriate package for your Linux distribution from [https://github.com/elastic/elastic-otel-php/releases](https://github.com/elastic/elastic-otel-php/releases).\\n\\nCurrently, we support packages for systems using DEB and RPM package managers for x86_64 and ARM64 processors.\\n\\nFor DEB-based systems, run the following command:\\n```bash\\ndpkg -i .deb\\n```\\n\\nFor RPM-based systems, run the following command:\\n```bash\\nrpm -ivh .rpm\\n```\\n\\nFor APK-based systems (Alpine), run the following command:\\n```bash\\napk add --allow-untrusted .apk\\n```\\n\\nThe package installer will automatically detect the installed PHP versions and update the configuration, so monitoring extension will be available with the next process restart (you need to restart the processes to load the new php.ini configuration).\\nSome environment variables are needed to provide the necessary configuration for instrumenting your services. These mainly concern the destination of your traces and the identification of your service. You’ll also need to provide the authorization headers for authentication with Elastic Observability Cloud and the Elastic Cloud endpoint where the data is sent.\\n\\n```bash\\nexport OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=\\"\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=\\n```\\n\\nwhere\\n* `OTEL_EXPORTER_OTLP_ENDPOINT`: The full URL of the endpoint where data will be sent.\\n* `OTEL_EXPORTER_OTLP_HEADERS`: A comma-separated list of `key=value` pairs that will be added to the headers of every request. This is typically used for authentication information.\\n\\nAfter restarting the application, as a result, you should see insights into the monitored applications in Kibana, such as service maps and trace views. In the example below, you can see trace details from the Aimeos application created using the Laravel framework.\\n\\n![Aimeos trace example](/assets/images/elastic-opentelemetry-distribution-php/traces-laravel.png)\\n\\nBelow is an example of a Slim application using HttpAsyncClient:\\n\\n![Slim ande HttpAsyncClient trace example](/assets/images/elastic-opentelemetry-distribution-php/traces-slim.png)\\n\\n## What\'s next?\\n\\nIn this alpha version, we support all modern PHP versions from 8.0 to 8.3 inclusive, providing instrumentation for PHP code, including popular frameworks like Laravel, Slim, and HttpAsyncClient, as well as native extensions such as PDO. In future releases, we plan to introduce additional features supported by OpenTelemetry, along with Elastic APM-exclusive features like Inferred Spans.\\n\\nStay tuned!\\n\\n\\nElastic is committed to helping OpenTelemetry succeed and to helping our customers use OpenTelemetry effectively in their systems. Last year, we [donated ECS](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/) and continue to work on integrating it with OpenTelemetry Semantic Conventions. More recently, we are working on [donating our eBPF-based profiler](https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry) to OpenTelemetry. We contribute to many of the language SDKs and other OpenTelemetry projects.\\n\\n\\n\\nAs authors of the PHP distribution, we are excited to work with the OpenTelemetry PHP community and to help make the PHP SDK a more robust, featureful, and obvious choice for PHP observability. Having a distro gives us the flexibility to build features on top of the vanilla OTel SDK. Currently, some advantages of the distro include: fully automatic installation and full auto-instrumentation. We will certainly contribute features upstream to the OTel PHP project when possible and will include additional features in the distro when it makes more sense for them to be there.\\n\\nThe Elastic OpenTelemetry Distribution of PHP is currently an alpha. Please [try it out](https://github.com/elastic/elastic-otel-php/blob/main/docs/get-started.md) and let us know if it might work for you. Watch for the [latest releases here](https://github.com/elastic/elastic-otel-php/releases). You can engage with us on [the project issue tracker](https://github.com/elastic/elastic-otel-php/issues) or [Elastic\'s PHP APM Discuss forum](https://discuss.elastic.co/tags/c/apm/php).\\n\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var y=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),w=(i,e)=>{for(var n in e)o(i,n,{get:e[n],enumerable:!0})},r=(i,e,n,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!g.call(i,a)&&a!==n&&o(i,a,{get:()=>e[a],enumerable:!(l=u(e,a))||l.enumerable});return i};var b=(i,e,n)=>(n=i!=null?p(f(i)):{},r(e||!i||!i.__esModule?o(n,\\"default\\",{value:i,enumerable:!0}):n,i)),P=i=>r(o({},\\"__esModule\\",{value:!0}),i);var c=y((E,s)=>{s.exports=_jsx_runtime});var v={};w(v,{default:()=>d,frontmatter:()=>T});var t=b(c()),T={title:\\"Introducing Elastic\'s distribution of OpenTelemetry PHP\\",slug:\\"elastic-opentelemetry-distribution-php\\",date:\\"2024-09-16\\",description:\\"Announcing the first alpha release of the Elastic distribution of OpenTelemetry PHP. See how easy it is to instrument your PHP applications with OpenTelemetry in this blog post.\\",author:[{slug:\\"pawel-filipczak\\"}],image:\\"php.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"php\\"},{slug:\\"instrumentation\\"}]};function h(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"We\\\\u2019re excited to introduce the first alpha release of \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-php\\",rel:\\"nofollow\\",children:\\"Elastic Distribution for OpenTelemetry PHP\\"}),\\". In this post, you\\\\u2019ll learn how to easily install and set up monitoring for your PHP applications.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"background\\",children:\\"Background\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic is standardizing on OpenTelemetry (OTel) for observability and security data collection. As part of that effort, we are \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions\\",rel:\\"nofollow\\",children:\\"providing distributions of the OpenTelemetry Language SDKs\\"}),\\". Our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-android#readme\\",rel:\\"nofollow\\",children:\\"Android\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-ios#readme\\",rel:\\"nofollow\\",children:\\"iOS\\"}),\\" SDKs have been OpenTelemetry-based from the start, and we have recently released alpha distributions for \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java#readme\\",rel:\\"nofollow\\",children:\\"Java\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-dotnet#readme\\",rel:\\"nofollow\\",children:\\".NET\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-node#readme\\",rel:\\"nofollow\\",children:\\"Node.js\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-python#readme\\",rel:\\"nofollow\\",children:\\"Python\\"}),\\". The Elastic distribution of OpenTelemetry PHP is the latest addition.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"getting-started\\",children:\\"Getting started\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To install Elastic Distribution for OpenTelemetry PHP for your application, download the appropriate package for your Linux distribution from \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-php/releases\\",rel:\\"nofollow\\",children:\\"https://github.com/elastic/elastic-otel-php/releases\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Currently, we support packages for systems using DEB and RPM package managers for x86_64 and ARM64 processors.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For DEB-based systems, run the following command:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`dpkg -i .deb\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For RPM-based systems, run the following command:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`rpm -ivh .rpm\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For APK-based systems (Alpine), run the following command:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`apk add --allow-untrusted .apk\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:`The package installer will automatically detect the installed PHP versions and update the configuration, so monitoring extension will be available with the next process restart (you need to restart the processes to load the new php.ini configuration).\\nSome environment variables are needed to provide the necessary configuration for instrumenting your services. These mainly concern the destination of your traces and the identification of your service. You\\\\u2019ll also need to provide the authorization headers for authentication with Elastic Observability Cloud and the Elastic Cloud endpoint where the data is sent.`}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`export OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=\\"\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"where\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_ENDPOINT\\"}),\\": The full URL of the endpoint where data will be sent.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_HEADERS\\"}),\\": A comma-separated list of \\",(0,t.jsx)(e.code,{children:\\"key=value\\"}),\\" pairs that will be added to the headers of every request. This is typically used for authentication information.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"After restarting the application, as a result, you should see insights into the monitored applications in Kibana, such as service maps and trace views. In the example below, you can see trace details from the Aimeos application created using the Laravel framework.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-distribution-php/traces-laravel.png\\",alt:\\"Aimeos trace example\\",width:\\"1312\\",height:\\"1759\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Below is an example of a Slim application using HttpAsyncClient:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-distribution-php/traces-slim.png\\",alt:\\"Slim ande HttpAsyncClient trace example\\",width:\\"1395\\",height:\\"885\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"whats-next\\",children:\\"What\'s next?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this alpha version, we support all modern PHP versions from 8.0 to 8.3 inclusive, providing instrumentation for PHP code, including popular frameworks like Laravel, Slim, and HttpAsyncClient, as well as native extensions such as PDO. In future releases, we plan to introduce additional features supported by OpenTelemetry, along with Elastic APM-exclusive features like Inferred Spans.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Stay tuned!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic is committed to helping OpenTelemetry succeed and to helping our customers use OpenTelemetry effectively in their systems. Last year, we \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"donated ECS\\"}),\\" and continue to work on integrating it with OpenTelemetry Semantic Conventions. More recently, we are working on \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry\\",rel:\\"nofollow\\",children:\\"donating our eBPF-based profiler\\"}),\\" to OpenTelemetry. We contribute to many of the language SDKs and other OpenTelemetry projects.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"As authors of the PHP distribution, we are excited to work with the OpenTelemetry PHP community and to help make the PHP SDK a more robust, featureful, and obvious choice for PHP observability. Having a distro gives us the flexibility to build features on top of the vanilla OTel SDK. Currently, some advantages of the distro include: fully automatic installation and full auto-instrumentation. We will certainly contribute features upstream to the OTel PHP project when possible and will include additional features in the distro when it makes more sense for them to be there.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The Elastic OpenTelemetry Distribution of PHP is currently an alpha. Please \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-php/blob/main/docs/get-started.md\\",rel:\\"nofollow\\",children:\\"try it out\\"}),\\" and let us know if it might work for you. Watch for the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-php/releases\\",rel:\\"nofollow\\",children:\\"latest releases here\\"}),\\". You can engage with us on \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-php/issues\\",rel:\\"nofollow\\",children:\\"the project issue tracker\\"}),\\" or \\",(0,t.jsx)(e.a,{href:\\"https://discuss.elastic.co/tags/c/apm/php\\",rel:\\"nofollow\\",children:\\"Elastic\'s PHP APM Discuss forum\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return P(v);})();\\n;return Component;"},"_id":"articles/elastic-opentelemetry-distribution-php.mdx","_raw":{"sourceFilePath":"articles/elastic-opentelemetry-distribution-php.mdx","sourceFileName":"elastic-opentelemetry-distribution-php.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-opentelemetry-distribution-php"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-distribution-php/php.jpg","readingTime":"4 min read","url":"/elastic-opentelemetry-distribution-php","headings":[{"level":2,"title":"Background","href":"#background"},{"level":2,"title":"Getting started","href":"#getting-started"},{"level":2,"title":"What\'s next?","href":"#whats-next"}]},{"title":"LLM Observability with Elastic, OpenLIT and OpenTelemetry","slug":"elastic-opentelemetry-langchain-openlit-tracing","date":"2024-08-29","description":"Langchain applications are growing in use. The ability to build out RAG-based applications, simple AI Assistants, and more is becoming the norm. Observing these applications is even harder. Given the various options that are out there, this blog shows how to use OpenTelemetry instrumentation with the OpenLIT instrumentation library to ingest traces into Elastic Observability APM.","image":"elastic-openlit-tracing.jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}},{"slug":"aman-agarwal","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"python","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}},{"slug":"azure-openai","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe realm of technology is evolving rapidly, and Large Language Models (LLMs) are at the forefront of this transformation. From chat bots to intelligent application copilots, LLMs are becoming increasingly sophisticated. As these applications grow more complex, ensuring their reliability and performance is paramount. This is where observability steps in, aided by OpenTelemetry and Elastic through the [OpenLIT](https://github.com/openlit/openlit) instrumentation library.\xa0\\n\\nOpenLIT is an open-source Observability and Evaluation tool that helps take your LLM apps from playground to debug to production. With OpenLit you get an ability to choose from a [range of Integrations](https://docs.openlit.io/latest/integrations/introduction) (across LLMs, VectorDBs, frameworks, and GPUs) to start tracking LLM performance, usage, and costs without hassle. In this blog we will look at tracking OpenAI and LangChain. to send telemetry to an OpenTelemetry compatible endpoint like Elastic.\\n\\n[Elastic supports OpenTelemetry natively](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html), it can take telemetry directly from the application (via the OpenTelemetry SDKs) or through a native OTel collector. No special agents are needed. Additionally [Elastic\'s EDOT](https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry) provides a supported set of OTel SDKs and an OTel Collector. In this blog we will connect our application directly to Elastic without a collector for simplicity.\\n\\n\\n## Why Observability Matters for LLM Applications\\n\\nMonitoring LLM applications is crucial for several reasons.\\n\\n1. It’s vital to keep track of how often LLMs are being used for usage and cost tracking.\\n\\n2. Latency is important to track since the response time from the model can vary based on the inputs passed to the LLM.\\n\\n3. Rate limiting is a common challenge, particularly for external LLMs, as applications depend more on these external API calls. When rate limits are hit, it can hinder these applications from performing their essential functions using these LLMs.\\n\\nBy keeping a close eye on these aspects, you can not only save costs but also avoid hitting request limits, ensuring your LLM applications perform optimally.\\n\\n\\n## What are the signals that you should be looking at?\\n\\nUsing Large Language Models (LLMs) in applications differs from traditional machine learning (ML) models. Primarily, LLMs are often accessed through external API calls instead of being run locally or in-house. It is crucial to capture the sequence of events (using traces), especially in a RAG-based application where there can be events before and after LLM usage. Also, analyzing the aggregated data (through metrics) provides a quick overview like request, tokens and cost is important for optimizing performance and managing costs. Here are the key signals to monitor:\\n\\n\\n### Traces\\n\\n**Request Metadata**: This is important in the context of LLMs, given the variety of parameters (like temperature and top\\\\_p) that can drastically affect both the response quality and the cost. Specific aspects to monitor are:\\n\\n1. Temperature: Indicates the level of creativity or randomness desired from the model’s outputs. Varying this parameter can significantly impact the nature of the generated content.\\n\\n2. top_p: Decides how selective the model is by choosing from a certain percentage of most likely words. A high “top_p” value means the model considers a wider range of words, making the text more varied.\\n\\n3. Model Name or Version: Essential for tracking over time, as updates to the LLM might affect performance or response characteristics.\\n\\n4. Prompt Details: The exact inputs sent to the LLM, which, unlike in-house ML models where inputs might be more controlled and homogeneous, can vary wildly and affect output complexity and cost implications.\\n\\n**Response Metadata**: Given the API-based interaction with LLMs, tracking the specifics of the response is key for cost management and quality assessment:\\n\\n1. Tokens: Directly impacts cost and is a measure of response length and complexity.\\n\\n2. Cost: Critical for budgeting, as API-based costs can scale with the number of requests and the complexity of each request.\\n\\n3. Completion Details: Similar to the prompt details but from the response perspective, providing insights into the model’s output characteristics and potential areas of inefficiency or unexpected cost.\\n\\n\\n### Metrics\\n\\n**Request Volume**: The total number of requests made to the LLM service. This helps in understanding the demand patterns and identifying any anomaly in usage, such as sudden spikes or drops.\\n\\n**Request Duration**: The time it takes for a request to be processed and a response to be received from the LLM. This includes network latency and the time the LLM takes to generate a response, providing insights into the performance and reliability of the LLM service.\\n\\n**Costs and Tokens Counters**: Keeping track of the total cost accrued and tokens consumed over time is essential for budgeting and cost optimization strategies. Monitoring these metrics can alert you to unexpected increases that may indicate inefficient use of the LLM or the need for optimization.\\n\\n\\n## Implementing Automatic Instrumentation with OpenLIT\\n\\n[OpenLIT](https://openlit.io/) automates telemetry data capture, simplifying the process for developers. Here’s a step-by-step guide to setting it up:\\n\\n**1. Install the OpenLIT SDK**:\\n\\nFirst, you must install the following package:\xa0\\n\\n```bash\\npip install openlit\\n```\\n\\n**Note:** OpenLIT currently supports Python, a popular language for Generative AI. The team is also working on expanding support to JavaScript soon.\\n\\n**2. Get your Elastic APM Credentials**\\n\\n1. Sign in to your [Elastic cloud account](https://cloud.elastic.co).\\n\\n2. Open the side navigation and click on APM under Observability.\\n\\n3. Make sure the APM Server is running\\n\\n![LangChainChat App in Elastic APM](/assets/images/elastic-opentelemetry-langchain-openlit-tracing/LangChainAppOTelAPMsetup.png)\\n\\n1. In the APM Agents section, Select OpenTelemetry and directly jump to Step 5 (Configure OpenTelemetry in your application):\\n\\n2. Copy and save the configuration value for `OTEL_EXPORTER_OTLP_ENDPOINT` and `OTEL_EXPORTER_OTLP_HEADERS`\\n\\n**3. Set Environment Variables**:\\n\\nOpenTelemetry Environment variables for Elastic can be set as follows in linux (or in the code). [Elastic OTel Documentation](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry-direct.html)\\n\\n```bash\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=\\"YOUR_ELASTIC_APM_OTLP_URL\\"\\nexport OTEL_EXPORTER_OTLP_HEADERS=\\"YOUR_ELASTIC_APM_AUTH\\"\\n```\\n\\n**Note:** Make sure to replace the space after Bearer with %20: \\n\\n`OTEL_EXPORTER_OTLP_HEADERS=“Authorization=Bearer%20[APIKEY]”`\\n\\n**4. Initialize the SDK**:\\n\\nYou will need to add the following to the LLM Application code.\\n\\n```python\\nimport openlit\\nopenlit.init()\\n```\\nOptionally, you can customize the application name and environment by setting the `application_name` and `environment` attributes when initializing OpenLIT in your application. These variables configure the OTel attributes `service.name` and `deployment.environment`, respectively. For more details on other configuration settings, check out the [OpenLIT GitHub Repository](https://github.com/openlit/openlit/tree/main/sdk/python#configuration).\\n\\n```python\\nopenlit.init(application_name=\\"YourAppName\\",environment=\\"Production\\")\\n```\\n\\nThe most popular libraries in GenAI are OpenAI (for accessing LLMs) and Langchain (for orchestrating steps). An example instrumentation of a Langchain and OpenAI based LLM Application will look like:\\n\\n```python\\nimport getpass\\nimport os\\nfrom langchain_openai import ChatOpenAI\\nfrom langchain_core.messages import HumanMessage, SystemMessage\\nimport openlit \\n\\n# Auto-instruments LLM and VectorDB calls, sending OTel traces and metrics to the configured endpoint\\nopenlit.init()\\n\\nos.environ[\\"OPENAI_API_KEY\\"] = getpass.getpass()\\nmodel = ChatOpenAI(model=\\"gpt-4\\")\\nmessages = [\\n SystemMessage(content=\\"Translate the following from English into Italian\\"),\\n HumanMessage(content=\\"hi!\\"),\\n]\\nmodel.invoke(messages)\\n```\\n\\n## Visualizing Data with Kibana\\n\\nOnce your LLM application is instrumented, visualizing the collected data is the next step. Follow the below steps to import a pre-built Kibana dashboard to get yourself started:\\n\\n1. Copy the dashboard NDJSON provided [here](https://docs.openlit.io/latest/connections/elastic#dashboard) and save it in a file with an extension `.ndjson`.\\n\\n2. Log into your Elastic Instance.\\n\\n3. Go to Stack Management > Saved Objects.\\n\\n4. Click Import and upload your file containing the dashboard NDJSON.\\n\\n5. Click Import and you should have the dashboard available.\\n\\n![Elastic-dashboard-1](/assets/images/elastic-opentelemetry-langchain-openlit-tracing/elastic-dashboard-1.jpg)\\n\\n![Elastic-dashboard-2](/assets/images/elastic-opentelemetry-langchain-openlit-tracing/elastic-dashboard-2.jpg)\\n\\nThe dashboard provides an in-depth overview of system metrics through eight key areas: Total Successful Requests, Request Duration Distribution, Request Rates, Usage Cost and Tokens, Top GenAI Models, GenAI Requests by Platform and Environment, Token Consumption vs. Cost. These metrics collectively help identify peak usage times, latency issues, rate limits, and resource allocation, facilitating performance tuning and cost management. This comprehensive breakdown aids in understanding LLM performance, ensuring consistent operation across environments, budget needs, and troubleshooting issues, ultimately optimizing overall system efficiency.\\n\\nAlso, you can see OpenTelemetry Traces from OpenLIT in Elastic APM, letting you look into each LLM request in detail. This setup ensures better system efficiency by helping with model performance checks, smooth running across environments, budget planning, and troubleshooting.\\n\\n![Elastic-dashboard-3](/assets/images/elastic-opentelemetry-langchain-openlit-tracing/elastic-dashboard-3.jpg)\\n\\n![Elastic-dashboard-4](/assets/images/elastic-opentelemetry-langchain-openlit-tracing/elastic-dashboard-4.jpg)\\n\\n## Conclusion\\n\\nObservability is crucial for the efficient operation of LLM applications. OpenTelemetry\'s open standards and extensive support, combined with [Elastic\'s APM](https://www.elastic.co/observability/application-performance-monitoring), [AIOps](https://www.elastic.co/observability/aiops), and [analytics](https://www.elastic.co/observability/log-monitoring) and [OpenLIT\'s](https://docs.openlit.io/latest/introduction) powerful and easy auto-instrumentation for 20+ GenAI tools from LLMs to VectorDBs, enable complete visibility into LLM performance.\xa0\\n\\nHopefully, this provides an easy-to-understand walk-through of instrumenting Langchain with OpenTelemetry and OpenLit and how easy it is to send traces into Elastic.\\n\\n**Additional resources for OpenTelemetry with Elastic:**\\n\\n- [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n\\n- [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n\\n- [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n\\n- [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n\\n- [Monitor OpenAI API and GPT models with OpenTelemetry and Elastic](https://www.elastic.co/blog/monitor-openai-api-gpt-models-opentelemetry-elastic)\\n\\n- Futureproof[ your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n\\n- Instrumentation resources:\\n\\n - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n\\n - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual instrumentation\xa0](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n\\n - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n\\n - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n","code":"var Component=(()=>{var d=Object.create;var a=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),w=(t,e)=>{for(var i in e)a(t,i,{get:e[i],enumerable:!0})},r=(t,e,i,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of u(e))!f.call(t,o)&&o!==i&&a(t,o,{get:()=>e[o],enumerable:!(l=m(e,o))||l.enumerable});return t};var b=(t,e,i)=>(i=t!=null?d(g(t)):{},r(e||!t||!t.__esModule?a(i,\\"default\\",{value:t,enumerable:!0}):i,t)),L=t=>r(a({},\\"__esModule\\",{value:!0}),t);var c=y((M,s)=>{s.exports=_jsx_runtime});var T={};w(T,{default:()=>p,frontmatter:()=>v});var n=b(c()),v={title:\\"LLM Observability with Elastic, OpenLIT and OpenTelemetry\\",slug:\\"elastic-opentelemetry-langchain-openlit-tracing\\",date:\\"2024-08-29\\",description:\\"Langchain applications are growing in use. The ability to build out RAG-based applications, simple AI Assistants, and more is becoming the norm. Observing these applications is even harder. Given the various options that are out there, this blog shows how to use OpenTelemetry instrumentation with the OpenLIT instrumentation library to ingest traces into Elastic Observability APM.\\",author:[{slug:\\"bahubali-shetti\\"},{slug:\\"aman-agarwal\\"}],image:\\"elastic-openlit-tracing.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"python\\"},{slug:\\"instrumentation\\"},{slug:\\"azure-openai\\"}]};function h(t){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"The realm of technology is evolving rapidly, and Large Language Models (LLMs) are at the forefront of this transformation. From chat bots to intelligent application copilots, LLMs are becoming increasingly sophisticated. As these applications grow more complex, ensuring their reliability and performance is paramount. This is where observability steps in, aided by OpenTelemetry and Elastic through the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/openlit/openlit\\",rel:\\"nofollow\\",children:\\"OpenLIT\\"}),\\" instrumentation library.\\\\xA0\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"OpenLIT is an open-source Observability and Evaluation tool that helps take your LLM apps from playground to debug to production. With OpenLit you get an ability to choose from a \\",(0,n.jsx)(e.a,{href:\\"https://docs.openlit.io/latest/integrations/introduction\\",rel:\\"nofollow\\",children:\\"range of Integrations\\"}),\\" (across LLMs, VectorDBs, frameworks, and GPUs) to start tracking LLM performance, usage, and costs without hassle. In this blog we will look at tracking OpenAI and LangChain. to send telemetry to an OpenTelemetry compatible endpoint like Elastic.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html\\",rel:\\"nofollow\\",children:\\"Elastic supports OpenTelemetry natively\\"}),\\", it can take telemetry directly from the application (via the OpenTelemetry SDKs) or through a native OTel collector. No special agents are needed. Additionally \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry\\",rel:\\"nofollow\\",children:\\"Elastic\'s EDOT\\"}),\\" provides a supported set of OTel SDKs and an OTel Collector. In this blog we will connect our application directly to Elastic without a collector for simplicity.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"why-observability-matters-for-llm-applications\\",children:\\"Why Observability Matters for LLM Applications\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Monitoring LLM applications is crucial for several reasons.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"It\\\\u2019s vital to keep track of how often LLMs are being used for usage and cost tracking.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Latency is important to track since the response time from the model can vary based on the inputs passed to the LLM.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Rate limiting is a common challenge, particularly for external LLMs, as applications depend more on these external API calls. When rate limits are hit, it can hinder these applications from performing their essential functions using these LLMs.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"By keeping a close eye on these aspects, you can not only save costs but also avoid hitting request limits, ensuring your LLM applications perform optimally.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"what-are-the-signals-that-you-should-be-looking-at\\",children:\\"What are the signals that you should be looking at?\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Using Large Language Models (LLMs) in applications differs from traditional machine learning (ML) models. Primarily, LLMs are often accessed through external API calls instead of being run locally or in-house. It is crucial to capture the sequence of events (using traces), especially in a RAG-based application where there can be events before and after LLM usage. Also, analyzing the aggregated data (through metrics) provides a quick overview like request, tokens and cost is important for optimizing performance and managing costs. Here are the key signals to monitor:\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"traces\\",children:\\"Traces\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Request Metadata\\"}),\\": This is important in the context of LLMs, given the variety of parameters (like temperature and top_p) that can drastically affect both the response quality and the cost. Specific aspects to monitor are:\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Temperature: Indicates the level of creativity or randomness desired from the model\\\\u2019s outputs. Varying this parameter can significantly impact the nature of the generated content.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"top_p: Decides how selective the model is by choosing from a certain percentage of most likely words. A high \\\\u201Ctop_p\\\\u201D value means the model considers a wider range of words, making the text more varied.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Model Name or Version: Essential for tracking over time, as updates to the LLM might affect performance or response characteristics.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Prompt Details: The exact inputs sent to the LLM, which, unlike in-house ML models where inputs might be more controlled and homogeneous, can vary wildly and affect output complexity and cost implications.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Response Metadata\\"}),\\": Given the API-based interaction with LLMs, tracking the specifics of the response is key for cost management and quality assessment:\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Tokens: Directly impacts cost and is a measure of response length and complexity.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Cost: Critical for budgeting, as API-based costs can scale with the number of requests and the complexity of each request.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Completion Details: Similar to the prompt details but from the response perspective, providing insights into the model\\\\u2019s output characteristics and potential areas of inefficiency or unexpected cost.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"metrics\\",children:\\"Metrics\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Request Volume\\"}),\\": The total number of requests made to the LLM service. This helps in understanding the demand patterns and identifying any anomaly in usage, such as sudden spikes or drops.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Request Duration\\"}),\\": The time it takes for a request to be processed and a response to be received from the LLM. This includes network latency and the time the LLM takes to generate a response, providing insights into the performance and reliability of the LLM service.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Costs and Tokens Counters\\"}),\\": Keeping track of the total cost accrued and tokens consumed over time is essential for budgeting and cost optimization strategies. Monitoring these metrics can alert you to unexpected increases that may indicate inefficient use of the LLM or the need for optimization.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"implementing-automatic-instrumentation-with-openlit\\",children:\\"Implementing Automatic Instrumentation with OpenLIT\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://openlit.io/\\",rel:\\"nofollow\\",children:\\"OpenLIT\\"}),\\" automates telemetry data capture, simplifying the process for developers. Here\\\\u2019s a step-by-step guide to setting it up:\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"1. Install the OpenLIT SDK\\"}),\\":\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"First, you must install the following package:\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`pip install openlit\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Note:\\"}),\\" OpenLIT currently supports Python, a popular language for Generative AI. The team is also working on expanding support to JavaScript soon.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"2. Get your Elastic APM Credentials\\"})}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Sign in to your \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic cloud account\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Open the side navigation and click on APM under Observability.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Make sure the APM Server is running\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-openlit-tracing/LangChainAppOTelAPMsetup.png\\",alt:\\"LangChainChat App in Elastic APM\\",width:\\"963\\",height:\\"895\\"})}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"In the APM Agents section, Select OpenTelemetry and directly jump to Step 5 (Configure OpenTelemetry in your application):\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Copy and save the configuration value for \\",(0,n.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_ENDPOINT\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_HEADERS\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"3. Set Environment Variables\\"}),\\":\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"OpenTelemetry Environment variables for Elastic can be set as follows in linux (or in the code). \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry-direct.html\\",rel:\\"nofollow\\",children:\\"Elastic OTel Documentation\\"})]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`export OTEL_EXPORTER_OTLP_ENDPOINT=\\"YOUR_ELASTIC_APM_OTLP_URL\\"\\nexport OTEL_EXPORTER_OTLP_HEADERS=\\"YOUR_ELASTIC_APM_AUTH\\"\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Note:\\"}),\\" Make sure to replace the space after Bearer with %20:\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_HEADERS=\\\\u201CAuthorization=Bearer%20[APIKEY]\\\\u201D\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"4. Initialize the SDK\\"}),\\":\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"You will need to add the following to the LLM Application code.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`import openlit\\nopenlit.init()\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Optionally, you can customize the application name and environment by setting the \\",(0,n.jsx)(e.code,{children:\\"application_name\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"environment\\"}),\\" attributes when initializing OpenLIT in your application. These variables configure the OTel attributes \\",(0,n.jsx)(e.code,{children:\\"service.name\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"deployment.environment\\"}),\\", respectively. For more details on other configuration settings, check out the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/openlit/openlit/tree/main/sdk/python#configuration\\",rel:\\"nofollow\\",children:\\"OpenLIT GitHub Repository\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`openlit.init(application_name=\\"YourAppName\\",environment=\\"Production\\")\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The most popular libraries in GenAI are OpenAI (for accessing LLMs) and Langchain (for orchestrating steps). An example instrumentation of a Langchain and OpenAI based LLM Application will look like:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`import getpass\\nimport os\\nfrom langchain_openai import ChatOpenAI\\nfrom langchain_core.messages import HumanMessage, SystemMessage\\nimport openlit \\n\\n# Auto-instruments LLM and VectorDB calls, sending OTel traces and metrics to the configured endpoint\\nopenlit.init()\\n\\nos.environ[\\"OPENAI_API_KEY\\"] = getpass.getpass()\\nmodel = ChatOpenAI(model=\\"gpt-4\\")\\nmessages = [\\n SystemMessage(content=\\"Translate the following from English into Italian\\"),\\n HumanMessage(content=\\"hi!\\"),\\n]\\nmodel.invoke(messages)\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"visualizing-data-with-kibana\\",children:\\"Visualizing Data with Kibana\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once your LLM application is instrumented, visualizing the collected data is the next step. Follow the below steps to import a pre-built Kibana dashboard to get yourself started:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Copy the dashboard NDJSON provided \\",(0,n.jsx)(e.a,{href:\\"https://docs.openlit.io/latest/connections/elastic#dashboard\\",rel:\\"nofollow\\",children:\\"here\\"}),\\" and save it in a file with an extension \\",(0,n.jsx)(e.code,{children:\\".ndjson\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Log into your Elastic Instance.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Go to Stack Management > Saved Objects.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Click Import and upload your file containing the dashboard NDJSON.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Click Import and you should have the dashboard available.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-openlit-tracing/elastic-dashboard-1.jpg\\",alt:\\"Elastic-dashboard-1\\",width:\\"3002\\",height:\\"1710\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-openlit-tracing/elastic-dashboard-2.jpg\\",alt:\\"Elastic-dashboard-2\\",width:\\"3004\\",height:\\"1726\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The dashboard provides an in-depth overview of system metrics through eight key areas: Total Successful Requests, Request Duration Distribution, Request Rates, Usage Cost and Tokens, Top GenAI Models, GenAI Requests by Platform and Environment, Token Consumption vs. Cost. These metrics collectively help identify peak usage times, latency issues, rate limits, and resource allocation, facilitating performance tuning and cost management. This comprehensive breakdown aids in understanding LLM performance, ensuring consistent operation across environments, budget needs, and troubleshooting issues, ultimately optimizing overall system efficiency.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Also, you can see OpenTelemetry Traces from OpenLIT in Elastic APM, letting you look into each LLM request in detail. This setup ensures better system efficiency by helping with model performance checks, smooth running across environments, budget planning, and troubleshooting.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-openlit-tracing/elastic-dashboard-3.jpg\\",alt:\\"Elastic-dashboard-3\\",width:\\"3004\\",height:\\"1726\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-openlit-tracing/elastic-dashboard-4.jpg\\",alt:\\"Elastic-dashboard-4\\",width:\\"3024\\",height:\\"1732\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Observability is crucial for the efficient operation of LLM applications. OpenTelemetry\'s open standards and extensive support, combined with \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"Elastic\'s APM\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/aiops\\",rel:\\"nofollow\\",children:\\"AIOps\\"}),\\", and \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/log-monitoring\\",rel:\\"nofollow\\",children:\\"analytics\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://docs.openlit.io/latest/introduction\\",rel:\\"nofollow\\",children:\\"OpenLIT\'s\\"}),\\" powerful and easy auto-instrumentation for 20+ GenAI tools from LLMs to VectorDBs, enable complete visibility into LLM performance.\\\\xA0\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Hopefully, this provides an easy-to-understand walk-through of instrumenting Langchain with OpenTelemetry and OpenLit and how easy it is to send traces into Elastic.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Additional resources for OpenTelemetry with Elastic:\\"})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/monitor-openai-api-gpt-models-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"Monitor OpenAI API and GPT models with OpenTelemetry and Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Futureproof\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\" your observability platform with OpenTelemetry and Elastic\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Instrumentation resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Python: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Java: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\\\xA0\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Node.js: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\".NET: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`]})]})}function p(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return L(T);})();\\n;return Component;"},"_id":"articles/elastic-opentelemetry-langchain-openlit-tracing.mdx","_raw":{"sourceFilePath":"articles/elastic-opentelemetry-langchain-openlit-tracing.mdx","sourceFileName":"elastic-opentelemetry-langchain-openlit-tracing.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-opentelemetry-langchain-openlit-tracing"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-langchain-openlit-tracing/elastic-openlit-tracing.jpg","readingTime":"8 min read","url":"/elastic-opentelemetry-langchain-openlit-tracing","headings":[{"level":2,"title":"Why Observability Matters for LLM Applications","href":"#why-observability-matters-for-llm-applications"},{"level":2,"title":"What are the signals that you should be looking at?","href":"#what-are-the-signals-that-you-should-be-looking-at"},{"level":3,"title":"Traces","href":"#traces"},{"level":3,"title":"Metrics","href":"#metrics"},{"level":2,"title":"Implementing Automatic Instrumentation with OpenLIT","href":"#implementing-automatic-instrumentation-with-openlit"},{"level":2,"title":"Visualizing Data with Kibana","href":"#visualizing-data-with-kibana"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Observing Langchain applications with Elastic, OpenTelemetry, and Langtrace","slug":"elastic-opentelemetry-langchain-tracing-langtrace","date":"2024-09-02","description":"Langchain applications are growing in use. The ability to build out RAG-based applications, simple AI Assistants, and more is becoming the norm. Observing these applications is even harder. Given the various options that are out there, this blog shows how to use OpenTelemetry instrumentation with Langtrace and ingest it into Elastic Observability APM","image":"elastic-langtrace.jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}},{"slug":"karthik-kalyanaraman","type":"Author","_raw":{}},{"slug":"yemi-adejumobi","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"python","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}},{"slug":"azure-openai","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs AI-driven applications become increasingly complex, the need for robust tools to monitor and optimize their performance is more critical than ever. LangChain has rapidly emerged as a crucial framework in the AI development landscape, particularly for building applications powered by large language models (LLMs). As its adoption has soared among developers, the need for effective debugging and performance optimization tools has become increasingly apparent. One such essential tool is the ability to obtain and analyze traces from Langchain applications. Tracing provides invaluable insights into the execution flow, helping developers understand and improve their AI-driven systems.\xa0[Elastic Observability\'s APM](https://www.elastic.co/observability/application-performance-monitoring) provides an ability to trace your Langchain apps with OpenTelemetry, but you need third-party libraries.\\n\\nThere are several options to trace for Langchain. [Langtrace](https://docs.langtrace.ai/introduction) is one such option. Langtrace is an [open-source](https://github.com/Scale3-Labs/langtrace) observability software that lets you capture, debug and analyze traces and metrics from all your applications. Langtrace automatically captures traces from LLM APIs/inferences, Vector Databases, and LLM-based Frameworks. Langtrace stands out due to its seamless integration with popular LLM frameworks and its ability to provide deep insights into complex AI workflows without requiring extensive manual instrumentation.\\n\\nLangtrace has an SDK, a lightweight library that can be installed and imported into your project to collect traces. The traces are OpenTelemetry-based and can be exported to Elastic without using a Langtrace API key.\\n\\nOpenTelemetry (OTel) is now broadly accepted as the industry standard for tracing. As one of the major Cloud Native Computing Foundation (CNCF) projects, with as many commits as Kubernetes, it is gaining support from major ISVs and cloud providers delivering support for the framework.\xa0\\n\\nHence, many LangChain-based applications will have multiple components beyond just LLM interactions. Using OpenTelemetry with LangChain is essential.\xa0\\n\\nThis blog will cover how you can use Langtrace SDK to trace a simple LangChain Chat app connecting to Azure OpenAI, perform a search in DuckDuckGoSearch and export the output to Elastic.\\n\\n\\n# Pre-requisites:\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/), and become familiar with [Elastic’s OpenTelemetry configuration](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html)\\n\\n- Have a LangChain app to instrument\\n\\n- Be familiar with using [OpenTelemetry’s Python SDK](https://opentelemetry.io/docs/languages/python/libraries/)\xa0\\n\\n- An account on your favorite LLM (AzureOpen AI), with API keys\\n\\n- The application we used in this blog, called `langchainChat` can be found in [Github langhcainChat](https://github.com/elastic/observability-examples/tree/main/langchainChat). It is built using Azure OpenAI and DuckDuckGo, but you can easily modify it for your LLM and search of choice.\\n\\n# App Overview and output in Elastic:\\n\\nTo showcase the combined power of Langtrace and Elastic, we created a simple LangChain app that performs the following steps:\\n\\n1. Takes customer input on the command line. (Queries)\\n\\n2. Sends these to the Azure OpenAI LLM via a LangChain.\\n\\n3. Utilizes chain tools to perform a search using DuckDuckGo.\\n\\n4. The LLM processes the search results and returns the relevant information to the user.\\n\\nHere is a sample interaction:\\n\\n![Chat Interaction](/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-cli.png)\\n\\nHere is what the service view looks like after we ran a few queries. \\n\\n![Service Overview](/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-overview.png)\\n\\nAs you can see, Elastic Observability’s APM recognizes the LangChain app and also shows the average latency, throughput, and transactions. Our average latency is 30s since it takes that log for humans to type the query (twice).\\n\\n\\nYou can also select other tabs to see, dependencies, errors, metrics, and more. One interesting part of Elastic APM is the ability to use universal profiling (eBPF) output also analyzed for this service. Here is what our service’s dependency is (Azure OpenAI) with its average latency, throughput, and failed transactions:\\n\\n![Dependencies](/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-dependency.png)\\n\\n![Dependency-metric](/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-dependency-metrics.png)\\n\\nWe see Azure OpenAI is on average 4s to give us the results.\\n\\nIf we drill into transactions and look at the trace for our queries on Taylor Swift and Pittsburgh Steelers, we can see both queries and their corresponding spans.\\n\\n![Trace for two queries](/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-trace.png)\\n\\nIn this trace:\\n\\n1. The user makes a query\\n\\n2. Azure OpenAI is called, but it uses a tool (DuckDuckGo) to obtain some results\\n\\n3. Azure OpenAI reviews and returns a summary to the end user\\n\\n4. Repeats for another query\\n\\nWe noticed that the other long span (other than Azure OpenAI) is Duckduckgo (\\\\~1000ms). We can individually look at the span and review the data:\\n\\n![Span details](/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-tools-span.png)\\n\\n# Configuration:\\n\\nHow do we make all this show up in Elastic? Let\'s go over the steps:\\n\\n\\n## OpenTelemetry Configuration\\n\\nTo leverage the full capabilities of OpenTelemetry with Langtrace and Elastic, we need to configure the SDK to generate traces and properly set up Elastic’s endpoint and authorization. Detailed instructions can be found in the [OpenTelemetry Auto-Instrumentation setup documentation](https://opentelemetry.io/docs/zero-code/python/#setup).\\n\\n\\n### OpenTelemetry Environment variables:\\n\\nFor Elastic, you can set the following OpenTelemetry environment variables either in your Linux/Mac environment or directly in the code:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_ENDPOINT=12345.apm.us-west-2.aws.cloud.es.io:443\\nOTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer%20ZZZZZZZ\\"\\nOTEL_RESOURCE_ATTRIBUTES=\\"service.name=langchainChat,service.version=1.0,deployment.environment=production\\"\\n```\\n\\nIn this setup:\\n\\n- **OTEL_EXPORTER_OTLP_ENDPOINT** is configured to send traces to Elastic.\\n\\n- **OTEL_EXPORTER_OTLP_HEADERS** provides the necessary authorization for the Elastic APM server.\\n\\n- **OTEL_RESOURCE_ATTRIBUTES** define key attributes like the service name, version, and deployment environment.\\n\\nThese values can be easily obtained from Elastic’s APM configuration screen under the OpenTelemetry section.\\n\\n![Span details](/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-OTelAPMsetup.png)\\n\\n**Note: No agent is required; the OTLP trace messages are sent directly to Elastic’s APM server, simplifying the setup process.**\\n\\n\\n## Langtrace Library:\\n\\nOpenTelemetry\'s auto-instrumentation can be extended to trace additional frameworks using instrumentation packages. For this blog post, you will need to install the Langtrace Python SDK:\\n\\n```python\\npip install langtrace-python-sdk \\n```\\n\\nAfter installation, you can add the following code to your project:\\n\\n```python\\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\n\\nfrom langtrace_python_sdk import langtrace, with_langtrace_root_span\\n```\\n\\n## Instrumentation:\\n\\nOnce the necessary libraries are installed and the environment variables are configured, you can use auto-instrumentation to trace your application. For example, run the following command to instrument your LangChain application with Elastic:\\n\\n```bash\\nopentelemetry-instrument python langtrace-elastic-demo.py\\n```\\n\\n![Trace for two queries](/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-trace.png)\\n\\nThe Langtrace OpenTelemetry library correctly captures the flow with minimal manual instrumentation, apart from integrating the OpenTelemetry library. Additionally, the LLM spans captured by Langtrace also include useful metadata such as token counts, model hyper-parameter settings etc. Note that the generated spans follow the OTEL GenAI semantics described [here](https://opentelemetry.io/docs/specs/semconv/attributes-registry/gen-ai/).\\n\\nIn summary, the instrumentation process involves:\\n\\n1. Capturing customer input from the command line (Queries).\\n\\n2. Sending these queries to the Azure OpenAI LLM via a LangChain.\\n\\n3. Utilizing chain tools, such as DuckDuckGo, to perform searches.\\n\\n4. The LLM processes the results and returns the relevant information to the user.\\n\\n\\n# Conclusion\\n\\nBy combining the power of [Langtrace](https://langtrace.ai/) with Elastic, developers can achieve unparalleled visibility into their LangChain applications, ensuring optimized performance and quicker debugging. This powerful combination simplifies the complex task of monitoring AI-driven systems, enabling you to focus on what truly matters—delivering value to your users. Throughout this blog,we\'ve covered the following essential steps and concepts:\\n\\n- How to manually instrument Langchain with OpenTelemetry\\n\\n- How to properly initialize OpenTelemetry and add a custom span\\n\\n- How to easily set the OTLP ENDPOINT and OTLP HEADERS with Elastic without the need for a collector\\n\\n- How to view and analyze traces in Elastic Observability APM\\n\\nThese steps provide a clear and actionable guide for developers looking to integrate robust tracing capabilities into their LangChain applications.\\n\\nWe hope this guide makes understanding and implementing OpenTelemetry tracing for LangChain simple, ensuring seamless integration with Elastic.\\n\\n\\n**Additional resources for OpenTelemetry with Elastic:**\\n\\n- [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n\\n- [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n\\n- [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n\\n- [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n\\n- [Monitor OpenAI API and GPT models with OpenTelemetry and Elastic](https://www.elastic.co/blog/monitor-openai-api-gpt-models-opentelemetry-elastic)\\n\\n- Futureproof[ your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n\\n- Instrumentation resources:\\n\\n - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n\\n - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual instrumentation\xa0](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n\\n - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n\\n - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n\\n- [Elastic APM - Langtrace AI Docs](https://docs.langtrace.ai/supported-integrations/observability-tools/elastic)\xa0\\n","code":"var Component=(()=>{var d=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),f=(t,e)=>{for(var i in e)r(t,i,{get:e[i],enumerable:!0})},o=(t,e,i,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!y.call(t,a)&&a!==i&&r(t,a,{get:()=>e[a],enumerable:!(l=u(e,a))||l.enumerable});return t};var b=(t,e,i)=>(i=t!=null?d(m(t)):{},o(e||!t||!t.__esModule?r(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>o(r({},\\"__esModule\\",{value:!0}),t);var c=w((A,s)=>{s.exports=_jsx_runtime});var T={};f(T,{default:()=>p,frontmatter:()=>L});var n=b(c()),L={title:\\"Observing Langchain applications with Elastic, OpenTelemetry, and Langtrace\\",slug:\\"elastic-opentelemetry-langchain-tracing-langtrace\\",date:\\"2024-09-02\\",description:\\"Langchain applications are growing in use. The ability to build out RAG-based applications, simple AI Assistants, and more is becoming the norm. Observing these applications is even harder. Given the various options that are out there, this blog shows how to use OpenTelemetry instrumentation with Langtrace and ingest it into Elastic Observability APM\\",author:[{slug:\\"bahubali-shetti\\"},{slug:\\"karthik-kalyanaraman\\"},{slug:\\"yemi-adejumobi\\"}],image:\\"elastic-langtrace.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"python\\"},{slug:\\"instrumentation\\"},{slug:\\"azure-openai\\"}]};function h(t){let e={a:\\"a\\",code:\\"code\\",h1:\\"h1\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"As AI-driven applications become increasingly complex, the need for robust tools to monitor and optimize their performance is more critical than ever. LangChain has rapidly emerged as a crucial framework in the AI development landscape, particularly for building applications powered by large language models (LLMs). As its adoption has soared among developers, the need for effective debugging and performance optimization tools has become increasingly apparent. One such essential tool is the ability to obtain and analyze traces from Langchain applications. Tracing provides invaluable insights into the execution flow, helping developers understand and improve their AI-driven systems.\\\\xA0\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"Elastic Observability\'s APM\\"}),\\" provides an ability to trace your Langchain apps with OpenTelemetry, but you need third-party libraries.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"There are several options to trace for Langchain. \\",(0,n.jsx)(e.a,{href:\\"https://docs.langtrace.ai/introduction\\",rel:\\"nofollow\\",children:\\"Langtrace\\"}),\\" is one such option. Langtrace is an \\",(0,n.jsx)(e.a,{href:\\"https://github.com/Scale3-Labs/langtrace\\",rel:\\"nofollow\\",children:\\"open-source\\"}),\\" observability software that lets you capture, debug and analyze traces and metrics from all your applications. Langtrace automatically captures traces from LLM APIs/inferences, Vector Databases, and LLM-based Frameworks. Langtrace stands out due to its seamless integration with popular LLM frameworks and its ability to provide deep insights into complex AI workflows without requiring extensive manual instrumentation.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Langtrace has an SDK, a lightweight library that can be installed and imported into your project to collect traces. The traces are OpenTelemetry-based and can be exported to Elastic without using a Langtrace API key.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"OpenTelemetry (OTel) is now broadly accepted as the industry standard for tracing. As one of the major Cloud Native Computing Foundation (CNCF) projects, with as many commits as Kubernetes, it is gaining support from major ISVs and cloud providers delivering support for the framework.\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Hence, many LangChain-based applications will have multiple components beyond just LLM interactions. Using OpenTelemetry with LangChain is essential.\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This blog will cover how you can use Langtrace SDK to trace a simple LangChain Chat app connecting to Azure OpenAI, perform a search in DuckDuckGoSearch and export the output to Elastic.\\"}),`\\n`,(0,n.jsx)(e.h1,{id:\\"pre-requisites\\",children:\\"Pre-requisites:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"}),\\", and become familiar with \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s OpenTelemetry configuration\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Have a LangChain app to instrument\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Be familiar with using \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/languages/python/libraries/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\\\u2019s Python SDK\\"}),\\"\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"An account on your favorite LLM (AzureOpen AI), with API keys\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"The application we used in this blog, called \\",(0,n.jsx)(e.code,{children:\\"langchainChat\\"}),\\" can be found in \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/langchainChat\\",rel:\\"nofollow\\",children:\\"Github langhcainChat\\"}),\\". It is built using Azure OpenAI and DuckDuckGo, but you can easily modify it for your LLM and search of choice.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h1,{id:\\"app-overview-and-output-in-elastic\\",children:\\"App Overview and output in Elastic:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To showcase the combined power of Langtrace and Elastic, we created a simple LangChain app that performs the following steps:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Takes customer input on the command line. (Queries)\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Sends these to the Azure OpenAI LLM via a LangChain.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Utilizes chain tools to perform a search using DuckDuckGo.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The LLM processes the search results and returns the relevant information to the user.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here is a sample interaction:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-cli.png\\",alt:\\"Chat Interaction\\",width:\\"1655\\",height:\\"1246\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here is what the service view looks like after we ran a few queries.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-overview.png\\",alt:\\"Service Overview\\",width:\\"1777\\",height:\\"811\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As you can see, Elastic Observability\\\\u2019s APM recognizes the LangChain app and also shows the average latency, throughput, and transactions. Our average latency is 30s since it takes that log for humans to type the query (twice).\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"You can also select other tabs to see, dependencies, errors, metrics, and more. One interesting part of Elastic APM is the ability to use universal profiling (eBPF) output also analyzed for this service. Here is what our service\\\\u2019s dependency is (Azure OpenAI) with its average latency, throughput, and failed transactions:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-dependency.png\\",alt:\\"Dependencies\\",width:\\"1813\\",height:\\"642\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-dependency-metrics.png\\",alt:\\"Dependency-metric\\",width:\\"1614\\",height:\\"576\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We see Azure OpenAI is on average 4s to give us the results.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If we drill into transactions and look at the trace for our queries on Taylor Swift and Pittsburgh Steelers, we can see both queries and their corresponding spans.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-trace.png\\",alt:\\"Trace for two queries\\",width:\\"1816\\",height:\\"1325\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this trace:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The user makes a query\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Azure OpenAI is called, but it uses a tool (DuckDuckGo) to obtain some results\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Azure OpenAI reviews and returns a summary to the end user\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Repeats for another query\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"We noticed that the other long span (other than Azure OpenAI) is Duckduckgo (~1000ms). We can individually look at the span and review the data:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-tools-span.png\\",alt:\\"Span details\\",width:\\"1819\\",height:\\"1325\\"})}),`\\n`,(0,n.jsx)(e.h1,{id:\\"configuration\\",children:\\"Configuration:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"How do we make all this show up in Elastic? Let\'s go over the steps:\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"opentelemetry-configuration\\",children:\\"OpenTelemetry Configuration\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To leverage the full capabilities of OpenTelemetry with Langtrace and Elastic, we need to configure the SDK to generate traces and properly set up Elastic\\\\u2019s endpoint and authorization. Detailed instructions can be found in the \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/zero-code/python/#setup\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Auto-Instrumentation setup documentation\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"opentelemetry-environment-variables\\",children:\\"OpenTelemetry Environment variables:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"For Elastic, you can set the following OpenTelemetry environment variables either in your Linux/Mac environment or directly in the code:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_ENDPOINT=12345.apm.us-west-2.aws.cloud.es.io:443\\nOTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer%20ZZZZZZZ\\"\\nOTEL_RESOURCE_ATTRIBUTES=\\"service.name=langchainChat,service.version=1.0,deployment.environment=production\\"\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this setup:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"OTEL_EXPORTER_OTLP_ENDPOINT\\"}),\\" is configured to send traces to Elastic.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"OTEL_EXPORTER_OTLP_HEADERS\\"}),\\" provides the necessary authorization for the Elastic APM server.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"OTEL_RESOURCE_ATTRIBUTES\\"}),\\" define key attributes like the service name, version, and deployment environment.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"These values can be easily obtained from Elastic\\\\u2019s APM configuration screen under the OpenTelemetry section.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-OTelAPMsetup.png\\",alt:\\"Span details\\",width:\\"963\\",height:\\"895\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Note: No agent is required; the OTLP trace messages are sent directly to Elastic\\\\u2019s APM server, simplifying the setup process.\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"langtrace-library\\",children:\\"Langtrace Library:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"OpenTelemetry\'s auto-instrumentation can be extended to trace additional frameworks using instrumentation packages. For this blog post, you will need to install the Langtrace Python SDK:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`pip install langtrace-python-sdk \\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"After installation, you can add the following code to your project:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\n\\nfrom langtrace_python_sdk import langtrace, with_langtrace_root_span\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"instrumentation\\",children:\\"Instrumentation:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once the necessary libraries are installed and the environment variables are configured, you can use auto-instrumentation to trace your application. For example, run the following command to instrument your LangChain application with Elastic:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`opentelemetry-instrument python langtrace-elastic-demo.py\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/langchainchat-trace.png\\",alt:\\"Trace for two queries\\",width:\\"1816\\",height:\\"1325\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The Langtrace OpenTelemetry library correctly captures the flow with minimal manual instrumentation, apart from integrating the OpenTelemetry library. Additionally, the LLM spans captured by Langtrace also include useful metadata such as token counts, model hyper-parameter settings etc. Note that the generated spans follow the OTEL GenAI semantics described \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/semconv/attributes-registry/gen-ai/\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In summary, the instrumentation process involves:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Capturing customer input from the command line (Queries).\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Sending these queries to the Azure OpenAI LLM via a LangChain.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Utilizing chain tools, such as DuckDuckGo, to perform searches.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The LLM processes the results and returns the relevant information to the user.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h1,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"By combining the power of \\",(0,n.jsx)(e.a,{href:\\"https://langtrace.ai/\\",rel:\\"nofollow\\",children:\\"Langtrace\\"}),\\" with Elastic, developers can achieve unparalleled visibility into their LangChain applications, ensuring optimized performance and quicker debugging. This powerful combination simplifies the complex task of monitoring AI-driven systems, enabling you to focus on what truly matters\\\\u2014delivering value to your users. Throughout this blog,we\'ve covered the following essential steps and concepts:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"How to manually instrument Langchain with OpenTelemetry\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"How to properly initialize OpenTelemetry and add a custom span\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"How to easily set the OTLP ENDPOINT and OTLP HEADERS with Elastic without the need for a collector\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"How to view and analyze traces in Elastic Observability APM\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"These steps provide a clear and actionable guide for developers looking to integrate robust tracing capabilities into their LangChain applications.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We hope this guide makes understanding and implementing OpenTelemetry tracing for LangChain simple, ensuring seamless integration with Elastic.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Additional resources for OpenTelemetry with Elastic:\\"})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/monitor-openai-api-gpt-models-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"Monitor OpenAI API and GPT models with OpenTelemetry and Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Futureproof\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\" your observability platform with OpenTelemetry and Elastic\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Instrumentation resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Python: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Java: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\\\xA0\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Node.js: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\".NET: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://docs.langtrace.ai/supported-integrations/observability-tools/elastic\\",rel:\\"nofollow\\",children:\\"Elastic APM - Langtrace AI Docs\\"}),\\"\\\\xA0\\"]}),`\\n`]}),`\\n`]})]})}function p(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return v(T);})();\\n;return Component;"},"_id":"articles/elastic-opentelemetry-langchain-tracing-langtrace.mdx","_raw":{"sourceFilePath":"articles/elastic-opentelemetry-langchain-tracing-langtrace.mdx","sourceFileName":"elastic-opentelemetry-langchain-tracing-langtrace.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-opentelemetry-langchain-tracing-langtrace"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-langchain-tracing-langtrace/elastic-langtrace.jpg","readingTime":"8 min read","url":"/elastic-opentelemetry-langchain-tracing-langtrace","headings":[{"level":2,"title":"OpenTelemetry Configuration","href":"#opentelemetry-configuration"},{"level":3,"title":"OpenTelemetry Environment variables:","href":"#opentelemetry-environment-variables"},{"level":2,"title":"Langtrace Library:","href":"#langtrace-library"},{"level":2,"title":"Instrumentation:","href":"#instrumentation"}]},{"title":"Tracing LangChain apps with Elastic, OpenLLMetry, and OpenTelemetry","slug":"elastic-opentelemetry-langchain-tracing","date":"2024-08-02","description":"LangChain applications are growing in use. The ability to build out RAG-based applications, simple AI Assistants, and more is becoming the norm. Observing these applications is even harder. Given the various options that are out there, this blog shows how to use OpenTelemetry instrumentation with OpenLLMetry and ingest it into Elastic Observability APM","image":"LangChainBlogMainImage.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"python","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}},{"slug":"azure-openai","type":"Tag","_raw":{}}],"body":{"raw":"\\nLangChain has rapidly emerged as a crucial framework in the AI development landscape, particularly for building applications powered by large language models (LLMs). As its adoption has soared among developers, the need for effective debugging and performance optimization tools has become increasingly apparent. One such essential tool is the ability to obtain and analyze traces from LangChain applications. Tracing provides invaluable insights into the execution flow, helping developers understand and improve their AI-driven systems.\xa0\\n\\nThere are several options to trace for LangChain. One is Langsmith, ideal for detailed tracing and a complete breakdown of requests to large language models (LLMs). However, it is specific to Langchain. OpenTelemetry (OTel) is now broadly accepted as the industry standard for tracing. As one of the major Cloud Native Computing Foundation (CNCF) projects, with as many commits as Kubernetes, it is gaining support from major ISVs and cloud providers delivering support for the framework.\xa0\\n\\nHence, many LangChain-based applications will have multiple components beyond just LLM interactions. Using OpenTelemetry with LangChain is essential. OpenLLMetry is an available option for tracing Langchain apps in addition to Langsmith.\\n\\nThis blog will show how you can get LangChain tracing into Elastic using the OpenLLMetry library `opentelemetry-instrumentation-langchain`.\\n\\n# Pre-requisites:\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/), and become familiar with [Elastic’s OpenTelemetry configuration](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html)\\n\\n- Have a LangChain app to instrument\\n\\n- Be familiar with using [OpenTelemetry’s Python SDK](https://opentelemetry.io/docs/languages/python/libraries/)\xa0\\n\\n- An account on your favorite LLM, with API keys\\n\\n\\n# Overview\\n\\nIn highlighting tracing I created a simple LangChain app that does the following:\\n\\n1. Takes customer input on the command line. (Queries)\\n\\n2. Sends these to the Azure OpenAI LLM via a LangChain.\\n\\n3. Chain tools are set to use the search with Tavily\xa0\\n\\n4. The LLM uses the output which returns the relevant information to the user.\\n\\n![Chat Interaction](/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAppCLI.png)\\n\\n![LangChainChat App in Elastic APM](/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAppInAPM.png)\\n\\nAs you can see Elastic Observability’s APM recognizes the LangChain App, and also shows the full trace (done with manual instrumentation):\\n\\n![LangChainChat App in Elastic APM](/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAutoIntrument.png)\\n\\nAs the above image shows:\\n\\n1. The user makes a query\\n2. Azure OpenAI is called, but it uses a tool (Tavily) to obtain some results\\n3. Azure OpenAI reviews and returns a summary to the end user\\n\\nThe code was manually instrumented, but auto-instrument can also be used.\\n\\n# OpenTelemetry Configuration\\n\\n\\nIn using OpenTelemetry, we need to configure the SDK to generate traces and configure Elastic’s endpoint and authorization. Instructions can be found in [OpenTelemetry Auto-Instrumentation setup documentation](https://opentelemetry.io/docs/zero-code/python/#setup).\\n\\n\\n## OpenTelemetry Environment variables:\\n\\nOpenTelemetry Environment variables for Elastic can be set as follows in linux (or in the code).\\n\\n```bash\\nOTEL_EXPORTER_OTLP_ENDPOINT=12345.apm.us-west-2.aws.cloud.es.io:443\\nOTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer%20ZZZZZZZ\\"\\nOTEL_RESOURCE_ATTRIBUTES=\\"service.name=langchainChat,service.version=1.0,deployment.environment=production\\"\\n```\\n\\nAs you can see `OTEL_EXPORTER_OTLP_ENDPOINT` is set to Elastic, and the corresponding authorization header is also provided. These can be easily obtained from Elastic’s APM configuration screen under OpenTelemetry\\n\\n![LangChainChat App in Elastic APM](/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAppOTelAPMsetup.png)\\n\\n**Note: No agent is needed, we simply send the OTLP trace messages directly to Elastic’s APM server.**\xa0\\n\\n## OpenLLMetry Library:\\n\\nOpenTelemetry\'s auto-instrumentation can be extended to trace other frameworks via instrumentation packages.\\n\\nFirst, you must install the following package:\xa0\\n\\n`pip install opentelemetry-instrumentation-langchain`\\n\\nThis library was developed by OpenLLMetry.\xa0\\n\\nThen you will need to add the following to the code.\\n\\n```python\\nfrom opentelemetry.instrumentation.langchain import LangchainInstrumentor\\nLangchainInstrumentor().instrument()\\n```\\n\\n## Instrumentation\\n\\nOnce the libraries are added, and the environment variables are set, you can use auto-instrumentation With auto-instrumentation, the following:\\n\\n```bash\\nopentelemetry-instrument python tavilyAzureApp.py\\n```\\n\\n![LangChainChat App in Elastic APM](/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAutoIntrument.png)\\n\\n\\nThe OpenLLMetry library does pull out the flow correctly with minimal manual instrumentation except for adding the OpenLLMetry library.\\n\\n1. Takes customer input on the command line. (Queries)\\n\\n2. Sends these to the Azure OpenAI LLM via a Lang chain.\\n\\n3. Chain tools are set to use the search with Tavily\xa0\\n\\n4. The LLM uses the output which returns the relevant information to the user.\\n\\n### Manual-instrumentation\\n\\nIf you want to get more details out of the application, you will need to manually instrument. To get more traces follow my [Python instrumentation guide](https://www.elastic.co/observability-labs/blog/manual-instrumentation-python-apps-opentelemetry). This guide will walk you through setting up the necessary OpenTelemetry bits, Additionally, you can also look at the documentation in [OTel for instrumenting in Python](https://opentelemetry.io/docs/languages/python/instrumentation/).\\n\\nNote that the env variables `OTEL_EXPORTER_OTLP_HEADERS` and `OTEL_EXPORTER_OTLP_ENDPOINT` are set as noted in the section above. You can also set up the `OTEL_RESOURCE_ATTRIBUTES`. \\n\\n\\nOnce you follow the steps in either guide and initiate the tracer, you will have to essentially just add the span where you want to get more details. In the example below, only one line of code is added for span initialization. \\n\\n\\nLook at the placement of with `tracer.start_as_current_span(\\"getting user query\\") as span:` below\\n\\n```python\\n# Creates a tracer from the global tracer provider\\ntracer = trace.get_tracer(\\"newsQuery\\")\\n\\nasync def chat_interface():\\n print(\\"Welcome to the AI Chat Interface!\\")\\n print(\\"Type \'quit\' to exit the chat.\\")\\n \\n with tracer.start_as_current_span(\\"getting user query\\") as span:\\n while True:\\n user_input = input(\\"\\\\nYou: \\").strip()\\n \\n if user_input.lower() == \'quit\':\\n print(\\"Thank you for chatting. Goodbye!\\")\\n break\\n \\n print(\\"AI: Thinking...\\")\\n try:\\n result = await chain.ainvoke({\\"query\\": user_input})\\n print(f\\"AI: {result.content}\\")\\n except Exception as e:\\n print(f\\"An error occurred: {str(e)}\\")\\n\\n\\nif __name__ == \\"__main__\\":\\n asyncio.run(chat_interface())\\n```\\n\\nAs you can see, with manual instrumentation, we get the following trace:\\n\\n![LangChainChat App in Elastic APM](/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAppManualTrace.png)\\n\\nWhich calls out when we enter our query function. `async def chat_interface()`\\n\\n\\n# Conclusion\\n\\nIn this blog, we discussed the following:\\n\\n- How to manually instrument LangChain with OpenTelemetry\\n\\n- How to properly initialize OpenTelemetry and add a custom span\\n\\n- How to easily set the OTLP ENDPOINT and OTLP HEADERS with Elastic without the need for a collector\\n\\n- See traces in Elastic Observability APM\\n\\nHopefully, this provides an easy-to-understand walk-through of instrumenting LangChain with OpenTelemetry and how easy it is to send traces into Elastic.\\n\\n**Additional resources for OpenTelemetry with Elastic:**\\n\\n- [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n\\n- [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n\\n- [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n\\n- [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n\\n- [Monitor OpenAI API and GPT models with OpenTelemetry and Elastic](https://www.elastic.co/blog/monitor-openai-api-gpt-models-opentelemetry-elastic)\\n\\n- Futureproof[ your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n\\n- Instrumentation resources:\\n\\n - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n\\n - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual instrumentation\xa0](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n\\n - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n\\n - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n\\n\\nAlso log into [cloud.elastic.co](https://cloud.elastic.co) to try out Elastic with a free trial.\\n\\n\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),f=(t,e)=>{for(var i in e)r(t,i,{get:e[i],enumerable:!0})},o=(t,e,i,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!y.call(t,a)&&a!==i&&r(t,a,{get:()=>e[a],enumerable:!(l=u(e,a))||l.enumerable});return t};var b=(t,e,i)=>(i=t!=null?p(g(t)):{},o(e||!t||!t.__esModule?r(i,\\"default\\",{value:t,enumerable:!0}):i,t)),L=t=>o(r({},\\"__esModule\\",{value:!0}),t);var c=w((A,s)=>{s.exports=_jsx_runtime});var v={};f(v,{default:()=>d,frontmatter:()=>T});var n=b(c()),T={title:\\"Tracing LangChain apps with Elastic, OpenLLMetry, and OpenTelemetry\\",slug:\\"elastic-opentelemetry-langchain-tracing\\",date:\\"2024-08-02\\",description:\\"LangChain applications are growing in use. The ability to build out RAG-based applications, simple AI Assistants, and more is becoming the norm. Observing these applications is even harder. Given the various options that are out there, this blog shows how to use OpenTelemetry instrumentation with OpenLLMetry and ingest it into Elastic Observability APM\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"LangChainBlogMainImage.png\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"python\\"},{slug:\\"instrumentation\\"},{slug:\\"azure-openai\\"}]};function h(t){let e={a:\\"a\\",code:\\"code\\",h1:\\"h1\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"LangChain has rapidly emerged as a crucial framework in the AI development landscape, particularly for building applications powered by large language models (LLMs). As its adoption has soared among developers, the need for effective debugging and performance optimization tools has become increasingly apparent. One such essential tool is the ability to obtain and analyze traces from LangChain applications. Tracing provides invaluable insights into the execution flow, helping developers understand and improve their AI-driven systems.\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"There are several options to trace for LangChain. One is Langsmith, ideal for detailed tracing and a complete breakdown of requests to large language models (LLMs). However, it is specific to Langchain. OpenTelemetry (OTel) is now broadly accepted as the industry standard for tracing. As one of the major Cloud Native Computing Foundation (CNCF) projects, with as many commits as Kubernetes, it is gaining support from major ISVs and cloud providers delivering support for the framework.\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Hence, many LangChain-based applications will have multiple components beyond just LLM interactions. Using OpenTelemetry with LangChain is essential. OpenLLMetry is an available option for tracing Langchain apps in addition to Langsmith.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This blog will show how you can get LangChain tracing into Elastic using the OpenLLMetry library \\",(0,n.jsx)(e.code,{children:\\"opentelemetry-instrumentation-langchain\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.h1,{id:\\"pre-requisites\\",children:[\\"Pre-requisites:\\",(0,n.jsx)(\\"a\\",{id:\\"pre-requisites\\"})]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"}),\\", and become familiar with \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s OpenTelemetry configuration\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Have a LangChain app to instrument\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Be familiar with using \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/languages/python/libraries/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\\\u2019s Python SDK\\"}),\\"\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"An account on your favorite LLM, with API keys\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h1,{id:\\"overview\\",children:\\"Overview\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In highlighting tracing I created a simple LangChain app that does the following:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Takes customer input on the command line. (Queries)\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Sends these to the Azure OpenAI LLM via a LangChain.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Chain tools are set to use the search with Tavily\\\\xA0\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The LLM uses the output which returns the relevant information to the user.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAppCLI.png\\",alt:\\"Chat Interaction\\",width:\\"1734\\",height:\\"924\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAppInAPM.png\\",alt:\\"LangChainChat App in Elastic APM\\",width:\\"1604\\",height:\\"887\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As you can see Elastic Observability\\\\u2019s APM recognizes the LangChain App, and also shows the full trace (done with manual instrumentation):\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAutoIntrument.png\\",alt:\\"LangChainChat App in Elastic APM\\",width:\\"1507\\",height:\\"830\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As the above image shows:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"The user makes a query\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Azure OpenAI is called, but it uses a tool (Tavily) to obtain some results\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Azure OpenAI reviews and returns a summary to the end user\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The code was manually instrumented, but auto-instrument can also be used.\\"}),`\\n`,(0,n.jsxs)(e.h1,{id:\\"opentelemetry-configuration\\",children:[\\"OpenTelemetry Configuration\\",(0,n.jsx)(\\"a\\",{id:\\"opentelemetry-configuration\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In using OpenTelemetry, we need to configure the SDK to generate traces and configure Elastic\\\\u2019s endpoint and authorization. Instructions can be found in \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/zero-code/python/#setup\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Auto-Instrumentation setup documentation\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"opentelemetry-environment-variables\\",children:[\\"OpenTelemetry Environment variables:\\",(0,n.jsx)(\\"a\\",{id:\\"opentelemetry-environment-variables\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"OpenTelemetry Environment variables for Elastic can be set as follows in linux (or in the code).\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_ENDPOINT=12345.apm.us-west-2.aws.cloud.es.io:443\\nOTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer%20ZZZZZZZ\\"\\nOTEL_RESOURCE_ATTRIBUTES=\\"service.name=langchainChat,service.version=1.0,deployment.environment=production\\"\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"As you can see \\",(0,n.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_ENDPOINT\\"}),\\" is set to Elastic, and the corresponding authorization header is also provided. These can be easily obtained from Elastic\\\\u2019s APM configuration screen under OpenTelemetry\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAppOTelAPMsetup.png\\",alt:\\"LangChainChat App in Elastic APM\\",width:\\"963\\",height:\\"895\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Note: No agent is needed, we simply send the OTLP trace messages directly to Elastic\\\\u2019s APM server.\\"}),\\"\\\\xA0\\"]}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"openllmetry-library\\",children:[\\"OpenLLMetry Library:\\",(0,n.jsx)(\\"a\\",{id:\\"openllmetry-library\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"OpenTelemetry\'s auto-instrumentation can be extended to trace other frameworks via instrumentation packages.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"First, you must install the following package:\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.code,{children:\\"pip install opentelemetry-instrumentation-langchain\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This library was developed by OpenLLMetry.\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Then you will need to add the following to the code.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`from opentelemetry.instrumentation.langchain import LangchainInstrumentor\\nLangchainInstrumentor().instrument()\\n`})}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"instrumentation\\",children:[\\"Instrumentation\\",(0,n.jsx)(\\"a\\",{id:\\"instrumentation\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once the libraries are added, and the environment variables are set, you can use auto-instrumentation With auto-instrumentation, the following:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`opentelemetry-instrument python tavilyAzureApp.py\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAutoIntrument.png\\",alt:\\"LangChainChat App in Elastic APM\\",width:\\"1507\\",height:\\"830\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The OpenLLMetry library does pull out the flow correctly with minimal manual instrumentation except for adding the OpenLLMetry library.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Takes customer input on the command line. (Queries)\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Sends these to the Azure OpenAI LLM via a Lang chain.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Chain tools are set to use the search with Tavily\\\\xA0\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The LLM uses the output which returns the relevant information to the user.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.h3,{id:\\"manual-instrumentation\\",children:[\\"Manual-instrumentation\\",(0,n.jsx)(\\"a\\",{id:\\"manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"If you want to get more details out of the application, you will need to manually instrument. To get more traces follow my \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/manual-instrumentation-python-apps-opentelemetry\\",rel:\\"nofollow\\",children:\\"Python instrumentation guide\\"}),\\". This guide will walk you through setting up the necessary OpenTelemetry bits, Additionally, you can also look at the documentation in \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/languages/python/instrumentation/\\",rel:\\"nofollow\\",children:\\"OTel for instrumenting in Python\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Note that the env variables \\",(0,n.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_HEADERS\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_ENDPOINT\\"}),\\" are set as noted in the section above. You can also set up the \\",(0,n.jsx)(e.code,{children:\\"OTEL_RESOURCE_ATTRIBUTES\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once you follow the steps in either guide and initiate the tracer, you will have to essentially just add the span where you want to get more details. In the example below, only one line of code is added for span initialization.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Look at the placement of with \\",(0,n.jsx)(e.code,{children:\'tracer.start_as_current_span(\\"getting user query\\") as span:\'}),\\" below\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-python\\",children:`# Creates a tracer from the global tracer provider\\ntracer = trace.get_tracer(\\"newsQuery\\")\\n\\nasync def chat_interface():\\n print(\\"Welcome to the AI Chat Interface!\\")\\n print(\\"Type \'quit\' to exit the chat.\\")\\n \\n with tracer.start_as_current_span(\\"getting user query\\") as span:\\n while True:\\n user_input = input(\\"\\\\\\\\nYou: \\").strip()\\n \\n if user_input.lower() == \'quit\':\\n print(\\"Thank you for chatting. Goodbye!\\")\\n break\\n \\n print(\\"AI: Thinking...\\")\\n try:\\n result = await chain.ainvoke({\\"query\\": user_input})\\n print(f\\"AI: {result.content}\\")\\n except Exception as e:\\n print(f\\"An error occurred: {str(e)}\\")\\n\\n\\nif __name__ == \\"__main__\\":\\n asyncio.run(chat_interface())\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As you can see, with manual instrumentation, we get the following trace:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-langchain-tracing/LangChainAppManualTrace.png\\",alt:\\"LangChainChat App in Elastic APM\\",width:\\"2778\\",height:\\"1682\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Which calls out when we enter our query function. \\",(0,n.jsx)(e.code,{children:\\"async def chat_interface()\\"})]}),`\\n`,(0,n.jsxs)(e.h1,{id:\\"conclusion\\",children:[\\"Conclusion\\",(0,n.jsx)(\\"a\\",{id:\\"conclusion\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"How to manually instrument LangChain with OpenTelemetry\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"How to properly initialize OpenTelemetry and add a custom span\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"How to easily set the OTLP ENDPOINT and OTLP HEADERS with Elastic without the need for a collector\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"See traces in Elastic Observability APM\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Hopefully, this provides an easy-to-understand walk-through of instrumenting LangChain with OpenTelemetry and how easy it is to send traces into Elastic.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Additional resources for OpenTelemetry with Elastic:\\"})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/monitor-openai-api-gpt-models-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"Monitor OpenAI API and GPT models with OpenTelemetry and Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Futureproof\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\" your observability platform with OpenTelemetry and Elastic\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Instrumentation resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Python: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Java: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\\\xA0\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Node.js: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\".NET: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual instrumentation\\"})]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Also log into \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"cloud.elastic.co\\"}),\\" to try out Elastic with a free trial.\\"]})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return L(v);})();\\n;return Component;"},"_id":"articles/elastic-opentelemetry-langchain-tracing.mdx","_raw":{"sourceFilePath":"articles/elastic-opentelemetry-langchain-tracing.mdx","sourceFileName":"elastic-opentelemetry-langchain-tracing.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-opentelemetry-langchain-tracing"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-langchain-tracing/LangChainBlogMainImage.png","readingTime":"7 min read","url":"/elastic-opentelemetry-langchain-tracing","headings":[{"level":2,"title":"OpenTelemetry Environment variables:","href":"#opentelemetry-environment-variablesa-idopentelemetry-environment-variablesa"},{"level":2,"title":"OpenLLMetry Library:","href":"#openllmetry-librarya-idopenllmetry-librarya"},{"level":2,"title":"Instrumentation","href":"#instrumentationa-idinstrumentationa"},{"level":3,"title":"Manual-instrumentation","href":"#manual-instrumentationa-idmanual-instrumentationa"}]},{"title":"Unlock possibilities with native OpenTelemetry: prioritize reliability, not proprietary limitations","slug":"elastic-opentelemetry-native-kubernetes-observability","date":"2024-11-12","description":"Elastic now supports Elastic Distributions of OpenTelemetry (EDOT) deployment and management on Kubernetes, using OTel Operator. SREs can now access out-of the-box configurations and dashboards designed to streamline collector deployment, application auto-instrumentation and lifecycle management with Elastic Observability.","image":"Kubecon-main-blog.jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}},{"slug":"miguel-luna","type":"Author","_raw":{}}],"tags":[{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"APM","type":"Tag","_raw":{}}],"body":{"raw":"\\nOpenTelemetry (OTel) is emerging as the standard for data ingestion since it delivers a vendor-agnostic way to ingest data across all telemetry signals. Elastic Observability is leading the OTel evolution with the following announcements:\\n\\n- **Native OTel Integrity:** Elastic is now 100% OTel-native, retaining OTel data natively without requiring data translation This eliminates the need for SREs to handle tedious schema conversions and develop customized views. All Elastic Observability capabilities—such as entity discovery, entity-centric insights, APM, infrastructure monitoring, and AI-driven issue analysis— now seamlessly work with native OTel data.\\n\\n- **Powerful end to end OTel based Kubernetes observability with** [**Elastic Distributions of OpenTelemetry (EDOT)**](https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry)**:** Elastic now supports EDOT deployment and management on Kubernetes via the OTel Operator, enabling streamlined EDOT collector deployment, application auto-instrumentation, and lifecycle management. With out-of-the-box OTel-based Kubernetes integration and dashboards, SREs gain instant, real-time visibility into cluster and application metrics, logs, and traces—with no manual configuration needed.\\n\\nFor organizations, it signals our commitment to open standards, streamlined data collection, and delivering insights from native OpenTelemetry data. Bring the power of Elastic Observability to your Kubernetes and OpenTelemetry deployments for maximum visibility and performance.\xa0\\n\\n\\n# Fully native OTel architecture with in-depth data analysis\\n\\nElastic’s OpenTelemetry-first architecture is 100% OTel-native, fully retaining the OTel data model, including OTel Semantic Conventions and Resource attributes, so your observability data remains in OpenTelemetry standards. OTel data in Elastic is also backward compatible with the Elastic Common Schema (ECS).\\n\\nSREs now gain a holistic view of resources, as Elastic accurately identifies entities through OTel resource attributes. For example, in a Kubernetes environment, Elastic identifies containers, hosts, and services and connects these entities to logs, metrics, and traces.\\n\\nOnce OTel data is in Elastic’s scalable vector datastore, Elastic’s capabilities such as the AI Assistant, zero-config machine learning-based anomaly detection, pattern analysis, and latency correlation empower SREs to quickly analyze and pinpoint potential issues in production environments.\\n\\n# Kubernetes insights with Elastic Distributions of OpenTelemetry (EDOT)\\n\\nEDOT reduces manual effort through automated onboarding and pre-configured dashboards. With EDOT and OpenTelemetry, Elastic makes Kubernetes monitoring straightforward and accessible for organizations of any size.\\n\\nEDOT paired with Elasticsearch,\xa0 enables storage for all signal types—logs, metrics, traces, and soon profiling—while maintaining essential resource attributes and semantic conventions.\\n\\nElastic’s OpenTelemetry-native solution enables customers to quickly extract insights from their data rather than manage complex infrastructure to ingest data. Elastic automates the deployment and configuration of observability components to deliver a user experience focused on ease and scalability, making it well-suited for large-scale environments and diverse industry needs.\\n\\nLet’s take a look at how Elastic’s EDOT enables visibility into Kubernetes environments.\\n\\n\\n## 1. Simple 3-step OTel ingest with lifecycle management and auto-instrumentation\xa0\\n\\nElastic leverages the upstream OpenTelemetry Operator to automate its EDOT lifecycle management—including deployment, scaling, and updates—allowing customers to focus on visibility into their Kubernetes infrastructure and applications instead of their observability infrastructure for data collection.\\n\\nThe Operator integrates with the EDOT Collector and language SDKs to provide a consistent, vendor-agnostic experience. For instance, when customers deploy a new application, they don’t need to manually configure instrumentation for various languages; the OpenTelemetry Operator manages this through auto-instrumentation, as supported by the upstream OpenTelemetry project.\\n\\nThis integration simplifies observability by ensuring consistent application instrumentation across the Kubernetes environment. Elastic’s collaboration with the upstream OpenTelemetry project strengthens this automation, enabling users to benefit from the latest updates and improvements in the OpenTelemetry ecosystem. By relying on open source tools like the OpenTelemetry Operator, Elastic ensures that its solutions stay aligned with the latest advancements in the OpenTelemetry project, reinforcing its commitment to open standards and community-driven development.\\n\\n![Unified OTel-based Kubernetes Experience](/assets/images/elastic-opentelemetry-native-kubernetes-observability/unified-otel-based-k8s-experience.png)\\n\\nThe diagram above shows how the operator can deploy multiple OTel collectors, helping SREs deploy individual EDOT Collectors for specific applications and infrastructure. This configuration improves availability for OTel ingest and the telemetry is sent directly to Elasticsearch servers via OTLP.\\n\\n[Check out our recent blog on how to set this up](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-otel-operator).\\n\\n\\n## 2. Out-of-the-box OTel-based Kubernetes integration with dashboards\\n\\nElastic delivers an OTel-based Kubernetes configuration for the OTel collector by packaging all necessary receivers, processors, and configurations for Kubernetes observability. This enables users to automatically collect, process, and analyze Kubernetes metrics, logs, and traces without the need to configure each component individually.\\n\\nThe OpenTelemetry Kubernetes Collector components provide essential building blocks, including receivers like the Kubernetes Receiver for cluster metrics, Kubeletstats Receiver for detailed node and container metrics, along with processors for data transformation and enrichment. By packaging these components, Elastic offers a turnkey solution that simplifies Kubernetes observability and eliminates the need for users to set up and configure individual collectors or processors.\\n\\nThis pre-packaged approach, which includes [OTel-native Kibana assets](https://github.com/elastic/integrations/tree/main/packages/kubernetes_otel) such as dashboards, allows users to focus on analyzing their observability data rather than managing configuration details. Elastic’s Unified OpenTelemetry Experience ensures that users can harness OpenTelemetry’s full potential without needing deep expertise. Whether you’re monitoring resource usage, container health, or API server metrics, users gain comprehensive observability through EDOT.\\n\\nFor more details on OpenTelemetry Kubernetes Collector components, visit[ OpenTelemetry Collector Components](https://opentelemetry.io/docs/kubernetes/collector/components/).\\n\\n![OTel-based Kubernetes Dashboard](/assets/images/elastic-opentelemetry-native-kubernetes-observability/otel-based-k8s-dashboard.png)\\n\\n## 3. Streamlined ingest architecture with OTel data and Elasticsearch\\n\\nElastic’s ingest architecture minimizes infrastructure overhead by enabling users to forward trace data directly into Elasticsearch with the EDOT Collector, removing the need for the Elastic APM server. This approach:\\n\\n- Reduces the costs and complexity associated with maintaining additional infrastructure, allowing users to deploy, scale, and manage their observability solutions with fewer resources.\\n\\n- Allows all OTel data, metrics, logs, and traces to be ingested and stored in Elastic’s singular vector database store enabling further analysis with Elastic’s AI-driven capabilities.\\n\\nSREs can now reduce operational burdens while also gaining high performance analytics and observability insights provided by Elastic.\\n\\n\\n# Elastic’s ongoing commitment to open source and OpenTelemetry\\n\\nWith [Elasticsearch fully open source once again](https://www.elastic.co/blog/elasticsearch-is-open-source-again) under the AGPL license,\xa0 this change reinforces our deep commitment to open standards and the open source community. This aligns with Elastic’s OpenTelemetry-first approach to observability, where Elastic Distributions of OpenTelemetry (EDOT) streamline OTel ingestion and schema auto-detection, providing real-time insights for Kubernetes and application telemetry.\\n\\nAs users increasingly adopt OTel as their schema and data collection architecture for observability, Elastic’s Distribution of OpenTelemetry (EDOT), currently in tech preview, enhances standard OpenTelemetry capabilities and improves troubleshooting while also serving as a commercially supported OTel distribution. EDOT, together with Elastic’s recent contributions of the Elastic Profiling Agent and Elastic Common Schema (ECS) to OpenTelemetry, reinforces Elastic’s commitment to establishing OpenTelemetry as the industry standard.\\n\\nCustomers can now embrace open standards and enjoy the advantages of an open, extensible platform that integrates seamlessly with their environment. End result?\xa0 Reduced costs, greater visibility, and vendor independence.\\n\\n\\n# Getting hands-on with Elastic Observability and EDOT\\n\\nReady to try out the OTel Operator with EDOT collector and SDKs to see how Elastic utilizes ingested OTel data in APM, Discover, Analysis, and out-of-the-box dashboards?\xa0\\n\\n- [Get an account on Elastic Cloud](https://cloud.elastic.co/)\\n\\n- [Learn about Elastic Distributions of OpenTelemetry Overview](https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry)\\n\\n- [Utilize the OpenTelemetry Demo with EDOT](https://www.elastic.co/observability-labs/blog/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry)\\n\\n- [Understand how you can monitor Kubernetes with EDOT](https://www.elastic.co/observability-labs/blog/infrastructure-monitoring-with-opentelemetry-in-elastic-observability)\\n\\n- [Utilize the EDOT Operator ](https://github.com/elastic/opentelemetry)and the [EDOT OTel collector](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector)\\n\\nIf you have your own application and want to configure EDOT the application with auto-instrumentation, read the following blogs on Go, Java, PHP, Python\\n\\n- [Auto-Instrumenting Go Applications with OpenTelemetry](https://www.elastic.co/observability-labs/blog/auto-instrumentation-go-applications-opentelemetry)\\n\\n- [Elastic Distribution OpenTelemetry Java Agent](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent)\\n\\n- [Elastic OpenTelemetry Distribution for PHP](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-php)\\n\\n- [Elastic OpenTelemetry Distribution for Python](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python)\\n\\n","code":"var Component=(()=>{var p=Object.create;var s=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var b=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var y=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),f=(i,e)=>{for(var n in e)s(i,n,{get:e[n],enumerable:!0})},o=(i,e,n,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!g.call(i,a)&&a!==n&&s(i,a,{get:()=>e[a],enumerable:!(r=u(e,a))||r.enumerable});return i};var w=(i,e,n)=>(n=i!=null?p(b(i)):{},o(e||!i||!i.__esModule?s(n,\\"default\\",{value:i,enumerable:!0}):n,i)),v=i=>o(s({},\\"__esModule\\",{value:!0}),i);var c=y((D,l)=>{l.exports=_jsx_runtime});var T={};f(T,{default:()=>h,frontmatter:()=>O});var t=w(c()),O={title:\\"Unlock possibilities with native OpenTelemetry: prioritize reliability, not proprietary limitations\\",slug:\\"elastic-opentelemetry-native-kubernetes-observability\\",date:\\"2024-11-12\\",description:\\"Elastic now supports Elastic Distributions of OpenTelemetry (EDOT) deployment and management on Kubernetes, using OTel Operator. SREs can now access out-of the-box configurations and dashboards designed to streamline collector deployment, application auto-instrumentation and lifecycle management with Elastic Observability.\\",author:[{slug:\\"bahubali-shetti\\"},{slug:\\"miguel-luna\\"}],image:\\"Kubecon-main-blog.jpg\\",tags:[{slug:\\"kubernetes\\"},{slug:\\"opentelemetry\\"},{slug:\\"APM\\"}]};function d(i){let e={a:\\"a\\",h1:\\"h1\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"OpenTelemetry (OTel) is emerging as the standard for data ingestion since it delivers a vendor-agnostic way to ingest data across all telemetry signals. Elastic Observability is leading the OTel evolution with the following announcements:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Native OTel Integrity:\\"}),\\" Elastic is now 100% OTel-native, retaining OTel data natively without requiring data translation This eliminates the need for SREs to handle tedious schema conversions and develop customized views. All Elastic Observability capabilities\\\\u2014such as entity discovery, entity-centric insights, APM, infrastructure monitoring, and AI-driven issue analysis\\\\u2014 now seamlessly work with native OTel data.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Powerful end to end OTel based Kubernetes observability with\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"Elastic Distributions of OpenTelemetry (EDOT)\\"})}),(0,t.jsx)(e.strong,{children:\\":\\"}),\\" Elastic now supports EDOT deployment and management on Kubernetes via the OTel Operator, enabling streamlined EDOT collector deployment, application auto-instrumentation, and lifecycle management. With out-of-the-box OTel-based Kubernetes integration and dashboards, SREs gain instant, real-time visibility into cluster and application metrics, logs, and traces\\\\u2014with no manual configuration needed.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"For organizations, it signals our commitment to open standards, streamlined data collection, and delivering insights from native OpenTelemetry data. Bring the power of Elastic Observability to your Kubernetes and OpenTelemetry deployments for maximum visibility and performance.\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.h1,{id:\\"fully-native-otel-architecture-with-in-depth-data-analysis\\",children:\\"Fully native OTel architecture with in-depth data analysis\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\\\\u2019s OpenTelemetry-first architecture is 100% OTel-native, fully retaining the OTel data model, including OTel Semantic Conventions and Resource attributes, so your observability data remains in OpenTelemetry standards. OTel data in Elastic is also backward compatible with the Elastic Common Schema (ECS).\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"SREs now gain a holistic view of resources, as Elastic accurately identifies entities through OTel resource attributes. For example, in a Kubernetes environment, Elastic identifies containers, hosts, and services and connects these entities to logs, metrics, and traces.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once OTel data is in Elastic\\\\u2019s scalable vector datastore, Elastic\\\\u2019s capabilities such as the AI Assistant, zero-config machine learning-based anomaly detection, pattern analysis, and latency correlation empower SREs to quickly analyze and pinpoint potential issues in production environments.\\"}),`\\n`,(0,t.jsx)(e.h1,{id:\\"kubernetes-insights-with-elastic-distributions-of-opentelemetry-edot\\",children:\\"Kubernetes insights with Elastic Distributions of OpenTelemetry (EDOT)\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"EDOT reduces manual effort through automated onboarding and pre-configured dashboards. With EDOT and OpenTelemetry, Elastic makes Kubernetes monitoring straightforward and accessible for organizations of any size.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"EDOT paired with Elasticsearch,\\\\xA0 enables storage for all signal types\\\\u2014logs, metrics, traces, and soon profiling\\\\u2014while maintaining essential resource attributes and semantic conventions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\\\\u2019s OpenTelemetry-native solution enables customers to quickly extract insights from their data rather than manage complex infrastructure to ingest data. Elastic automates the deployment and configuration of observability components to deliver a user experience focused on ease and scalability, making it well-suited for large-scale environments and diverse industry needs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s take a look at how Elastic\\\\u2019s EDOT enables visibility into Kubernetes environments.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"1-simple-3-step-otel-ingest-with-lifecycle-management-and-auto-instrumentation\\",children:\\"1. Simple 3-step OTel ingest with lifecycle management and auto-instrumentation\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic leverages the upstream OpenTelemetry Operator to automate its EDOT lifecycle management\\\\u2014including deployment, scaling, and updates\\\\u2014allowing customers to focus on visibility into their Kubernetes infrastructure and applications instead of their observability infrastructure for data collection.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Operator integrates with the EDOT Collector and language SDKs to provide a consistent, vendor-agnostic experience. For instance, when customers deploy a new application, they don\\\\u2019t need to manually configure instrumentation for various languages; the OpenTelemetry Operator manages this through auto-instrumentation, as supported by the upstream OpenTelemetry project.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This integration simplifies observability by ensuring consistent application instrumentation across the Kubernetes environment. Elastic\\\\u2019s collaboration with the upstream OpenTelemetry project strengthens this automation, enabling users to benefit from the latest updates and improvements in the OpenTelemetry ecosystem. By relying on open source tools like the OpenTelemetry Operator, Elastic ensures that its solutions stay aligned with the latest advancements in the OpenTelemetry project, reinforcing its commitment to open standards and community-driven development.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-native-kubernetes-observability/unified-otel-based-k8s-experience.png\\",alt:\\"Unified OTel-based Kubernetes Experience\\",width:\\"960\\",height:\\"540\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The diagram above shows how the operator can deploy multiple OTel collectors, helping SREs deploy individual EDOT Collectors for specific applications and infrastructure. This configuration improves availability for OTel ingest and the telemetry is sent directly to Elasticsearch servers via OTLP.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-otel-operator\\",rel:\\"nofollow\\",children:\\"Check out our recent blog on how to set this up\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"2-out-of-the-box-otel-based-kubernetes-integration-with-dashboards\\",children:\\"2. Out-of-the-box OTel-based Kubernetes integration with dashboards\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic delivers an OTel-based Kubernetes configuration for the OTel collector by packaging all necessary receivers, processors, and configurations for Kubernetes observability. This enables users to automatically collect, process, and analyze Kubernetes metrics, logs, and traces without the need to configure each component individually.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The OpenTelemetry Kubernetes Collector components provide essential building blocks, including receivers like the Kubernetes Receiver for cluster metrics, Kubeletstats Receiver for detailed node and container metrics, along with processors for data transformation and enrichment. By packaging these components, Elastic offers a turnkey solution that simplifies Kubernetes observability and eliminates the need for users to set up and configure individual collectors or processors.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This pre-packaged approach, which includes \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/integrations/tree/main/packages/kubernetes_otel\\",rel:\\"nofollow\\",children:\\"OTel-native Kibana assets\\"}),\\" such as dashboards, allows users to focus on analyzing their observability data rather than managing configuration details. Elastic\\\\u2019s Unified OpenTelemetry Experience ensures that users can harness OpenTelemetry\\\\u2019s full potential without needing deep expertise. Whether you\\\\u2019re monitoring resource usage, container health, or API server metrics, users gain comprehensive observability through EDOT.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For more details on OpenTelemetry Kubernetes Collector components, visit\\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/kubernetes/collector/components/\\",rel:\\"nofollow\\",children:\\" OpenTelemetry Collector Components\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-native-kubernetes-observability/otel-based-k8s-dashboard.png\\",alt:\\"OTel-based Kubernetes Dashboard\\",width:\\"1810\\",height:\\"1085\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"3-streamlined-ingest-architecture-with-otel-data-and-elasticsearch\\",children:\\"3. Streamlined ingest architecture with OTel data and Elasticsearch\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\\\\u2019s ingest architecture minimizes infrastructure overhead by enabling users to forward trace data directly into Elasticsearch with the EDOT Collector, removing the need for the Elastic APM server. This approach:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Reduces the costs and complexity associated with maintaining additional infrastructure, allowing users to deploy, scale, and manage their observability solutions with fewer resources.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Allows all OTel data, metrics, logs, and traces to be ingested and stored in Elastic\\\\u2019s singular vector database store enabling further analysis with Elastic\\\\u2019s AI-driven capabilities.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"SREs can now reduce operational burdens while also gaining high performance analytics and observability insights provided by Elastic.\\"}),`\\n`,(0,t.jsx)(e.h1,{id:\\"elastics-ongoing-commitment-to-open-source-and-opentelemetry\\",children:\\"Elastic\\\\u2019s ongoing commitment to open source and OpenTelemetry\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elasticsearch-is-open-source-again\\",rel:\\"nofollow\\",children:\\"Elasticsearch fully open source once again\\"}),\\" under the AGPL license,\\\\xA0 this change reinforces our deep commitment to open standards and the open source community. This aligns with Elastic\\\\u2019s OpenTelemetry-first approach to observability, where Elastic Distributions of OpenTelemetry (EDOT) streamline OTel ingestion and schema auto-detection, providing real-time insights for Kubernetes and application telemetry.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"As users increasingly adopt OTel as their schema and data collection architecture for observability, Elastic\\\\u2019s Distribution of OpenTelemetry (EDOT), currently in tech preview, enhances standard OpenTelemetry capabilities and improves troubleshooting while also serving as a commercially supported OTel distribution. EDOT, together with Elastic\\\\u2019s recent contributions of the Elastic Profiling Agent and Elastic Common Schema (ECS) to OpenTelemetry, reinforces Elastic\\\\u2019s commitment to establishing OpenTelemetry as the industry standard.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Customers can now embrace open standards and enjoy the advantages of an open, extensible platform that integrates seamlessly with their environment. End result?\\\\xA0 Reduced costs, greater visibility, and vendor independence.\\"}),`\\n`,(0,t.jsx)(e.h1,{id:\\"getting-hands-on-with-elastic-observability-and-edot\\",children:\\"Getting hands-on with Elastic Observability and EDOT\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Ready to try out the OTel Operator with EDOT collector and SDKs to see how Elastic utilizes ingested OTel data in APM, Discover, Analysis, and out-of-the-box dashboards?\\\\xA0\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"Get an account on Elastic Cloud\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry\\",rel:\\"nofollow\\",children:\\"Learn about Elastic Distributions of OpenTelemetry Overview\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry\\",rel:\\"nofollow\\",children:\\"Utilize the OpenTelemetry Demo with EDOT\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/infrastructure-monitoring-with-opentelemetry-in-elastic-observability\\",rel:\\"nofollow\\",children:\\"Understand how you can monitor Kubernetes with EDOT\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry\\",rel:\\"nofollow\\",children:\\"Utilize the EDOT Operator \\"}),\\"and the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector\\",rel:\\"nofollow\\",children:\\"EDOT OTel collector\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you have your own application and want to configure EDOT the application with auto-instrumentation, read the following blogs on Go, Java, PHP, Python\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/auto-instrumentation-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-Instrumenting Go Applications with OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"Elastic Distribution OpenTelemetry Java Agent\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-php\\",rel:\\"nofollow\\",children:\\"Elastic OpenTelemetry Distribution for PHP\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python\\",rel:\\"nofollow\\",children:\\"Elastic OpenTelemetry Distribution for Python\\"})}),`\\n`]}),`\\n`]})]})}function h(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(d,{...i})}):d(i)}return v(T);})();\\n;return Component;"},"_id":"articles/elastic-opentelemetry-native-kubernetes-observability.mdx","_raw":{"sourceFilePath":"articles/elastic-opentelemetry-native-kubernetes-observability.mdx","sourceFileName":"elastic-opentelemetry-native-kubernetes-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-opentelemetry-native-kubernetes-observability"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-native-kubernetes-observability/Kubecon-main-blog.jpg","readingTime":"7 min read","url":"/elastic-opentelemetry-native-kubernetes-observability","headings":[{"level":2,"title":"1. Simple 3-step OTel ingest with lifecycle management and auto-instrumentation\xa0","href":"#1-simple-3-step-otel-ingest-with-lifecycle-management-and-auto-instrumentation"},{"level":2,"title":"2. Out-of-the-box OTel-based Kubernetes integration with dashboards","href":"#2-out-of-the-box-otel-based-kubernetes-integration-with-dashboards"},{"level":2,"title":"3. Streamlined ingest architecture with OTel data and Elasticsearch","href":"#3-streamlined-ingest-architecture-with-otel-data-and-elasticsearch"}]},{"title":"Native OTel-based K8s & App Observability in 3 Steps with Elastic","slug":"elastic-opentelemetry-otel-operator","date":"2024-11-13","description":"Elastic\'s Distributions of OpenTelemetry are now supported with the OTel Operator, providing auto instrumentation of applications with EDOT SDKs, and deployment and lifecycle management of the EDOT OTel Collector for Kubernetes Observability. Learn how to configure this in 3 easy steps","image":"OTel-operator.jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"APM","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic recently released its Elastic Distributions of OpenTelemetry (EDOT) which have been developed to enhance the capabilities of standard OpenTelemetry distributions and improve existing OpenTelemetry support from Elastic. EDOT helps Elastic deliver its new Unified OpenTelemetry Experience. SRE’s are no longer burdened with a set of tedious steps instrumenting and ingesting OTel data into Observability. SREs get a simple and frictionless way to instrument the OTel collector, and applications, and ingest all the OTel data into Elastic. The components of this experience include: (detailed in the overview blog)\\n\\n- [Elastic Distributions for OpenTelemetry (EDOT)](https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry)\\n\\n- Elastic’s configuration for the OpenTelemetry Operator providing:\\n\\n - OTel Lifecycle management for the OTel collector and SDKs\\n\\n - Auto instrumentation of apps, which most developers will not instrument\\n\\n- Pre-packaged receivers, processors, exporters, and configuration for the OTel Kubernetes Collector\\n\\n- Out-of-the-box OTel-based K8S dashboards for metrics and logs\\n\\n- Discovered inventory views for services, hosts, and containers\\n\\n- Direct OTel ingest into Elasticsearch (by passing APM) - all your data (logs, metrics, and traces) is now stored in Elastic’s Search AI Lake\\n\\nIn this blog we will cover how to ingest OTel for K8S and your application in 3 easy steps:\\n\\n1. Copy the install commands from the UI\\n\\n2. Add the OpenTelemetry helm charts, Install the OpenTelemetry Operator with Elastic’s helm configuration & set your Elastic endpoint and authentication\\n\\n3. Annotate the app services you want to be auto-instrumented\xa0\\n\\nThen you can easily see K8S metrics, logs and application logs, metrics, and traces in Elastic Observability.\\n\\n![OpenTelemetry Unified Obseervability Experience](/assets/images/elastic-opentelemetry-otel-operator/unified-otel-based-k8s-experience.png)\\n\\n\\nTo follow this blog you will need to have:\\n\\n1. An account on cloud.elastic.co, with access to get the Elasticsearch endpoint and authentication (api key)\\n\\n2. A non-instrumented application with services based on Go, dotnet, Python, or Java. Auto-instrumentation through the OTel operator. In this example, we will be using the [Elastiflix](https://github.com/elastic/observability-examples/tree/main/Elastiflix) application.\xa0\\n\\n3. A Kubernetes cluster, we used EKS in our setup\\n\\n4. Helm and Kubectl loaded\\n\\nTo find the authentication, you can find it in the integrations section of Elastic. More information is also available in the [documentation](https://www.elastic.co/guide/en/kibana/current/api-keys.html).\\n\\n![OpenTelemetry API Keys](/assets/images/elastic-opentelemetry-otel-operator/otel-api-keys.png)\\n\\n## K8S and Application Observability in Elastic:\\n\\n\\nBefore we walk you through the steps, let\'s show you what is visible in Elastic.\\n\\nOnce the Operator starts the OTel Collector, you can see the following in Elastic:\\n\\n### Kubernetes metrics:\\n\\n\\nUsing an out-of-the-box dashboard, you can see node metrics, overall cluster metrics, and status across pods, deployments, etc.\\n\\n\\n![OTel-based Kubernetes dashboard](/assets/images/elastic-opentelemetry-otel-operator/otel-k8s-dashboard.png)\\n\\n\\n### Discovered Inventory for Hosts, services, and containers:\\n\\n\\nThis can be found at Observability->Inventory on the UI\\n\\n![OTel-based Kubernetes inventory](/assets/images/elastic-opentelemetry-otel-operator/otel-k8s-inventory.png)\\n\\n\\n### Detailed metrics, logs, and processor info on hosts:\\n\\n\\nThis can be found at Observability->Infrastructure->Hosts\\n\\n\\n![OTel-based Kubernetes host metrics](/assets/images/elastic-opentelemetry-otel-operator/otel-k8s-hosts.png)\\n\\n\\n### K8S and application logs in Elastic’s New Discover (called Explorer)\\n\\n\\nThis can be found on Observability->Discover\\n\\n\\n![OTel-based Kubernetes logs](/assets/images/elastic-opentelemetry-otel-operator/otel-ingest-logs.png)\\n\\n### Application Service views (logs, metrics, and traces):\\n\\n\\nThis can be found on Observability->Application\\n\\n\\nThen select the service and drill down into different aspects. \\n\\n\\n![OTel-based Application Java traces](/assets/images/elastic-opentelemetry-otel-operator/otel-java-traces.png)\\n\\n\\nAbove we are showing how traces are shown using Native OTel data.\\n\\n\\n## Steps to install\\n\\n\\n### Step 0. Follow the commands listed in the UI \\n\\n\\nUnder Add data->Kubernetes->Kubernetes Monitoring with EDOT\\n\\n\\nYou will find the following instructions, which we will follow here.\\n\\n\\n![EDOT Operator Install](/assets/images/elastic-opentelemetry-otel-operator/otel-edot-operator-install.png)\\n\\n\\n### Step 1. Install the EDOT config for the OpenTelemetry Operator\\n\\n\\nRun the following commands. Please make sure that you have already authenticated in your K8s Cluster and this is where you will run the helm commands provided below.\\n\\n\\n```bash\\n# Install helm repo needed\\nhelm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts --force-update\\n# Install needed secrets. Provide the Elasticsearch Endpoint URL and API key you have noted in previous steps\\nkubectl create ns opentelemetry-operator-system\\nkubectl create -n opentelemetry-operator-system secret generic elastic-secret-otel \\\\\\n --from-literal=elastic_endpoint=\'YOUR_ELASTICSEARCH_ENDPOINT\' \\\\\\n --from-literal=elastic_api_key=\'YOUR_ELASTICSEARCH_API_KEY\'\\n# Install the EDOT Operator\\nhelm install opentelemetry-kube-stack open-telemetry/opentelemetry-kube-stack --namespace opentelemetry-operator-system --create-namespace --values https://raw.githubusercontent.com/elastic/opentelemetry/refs/heads/main/resources/kubernetes/operator/helm/values.yaml --version 0.3.0\\n```\\n\\n\\nThe values.yaml file configuration can be found [here](https://github.com/elastic/opentelemetry/blob/main/resources/kubernetes/operator/helm/values.yaml).\\n\\n\\n\\n\\n### Step 1b: Ensure OTel data is arriving in Elastic\\n\\n\\nThe simplest way to check is to go to Menu > Dashboards > **\\\\[OTEL]\\\\[Metrics Kubernetes] Cluster Overview,** and ensure you see the following dashboard being populated\\n\\n\\n![OTel-based Kubernetes dashboard](/assets/images/elastic-opentelemetry-otel-operator/otel-k8s-dashboard.png)\\n\\n\\n### Step 2: Annotate the application with auto-instrumentation\\n\\n\\nFor this example, we’re only going to annotate one service, the favorite-java service in the [Elastiflix](https://github.com/elastic/observability-examples/tree/main/Elastiflix) application\\n\\n\\nUse the following commands to initiate auto-instrumentation:\\n\\n\\n```bash\\n#Annotate Java namespace\\nkubectl annotate namespace java instrumentation.opentelemetry.io/inject-java=\\"opentelemetry-operator-system/elastic-instrumentation\\"\\n#Restart the java-app to get the new annotation\\nkubectl rollout restart deployment java-app -n java\\n```\\n\\nYou can also modify the yaml for your pod with the annotation\\n\\n```bash\\nmetadata:\\n name: my-app\\n annotations:\\n instrumentation.opentelemetry.io/inject-python: \\"true\\"\\n```\\n\\nThese instructions are provided in the UI:\\n\\n\\n![Annotate Application with EDOT SDK](/assets/images/elastic-opentelemetry-otel-operator/otel-edot-sdk-annotate.png)\\n\\n## Check out the service data in Elastic APM\\n\\n\\nOnce the OTel data is in Elastic, you can see:\\n\\n\\n- Out-of-the-box dashboards for OTel-based Kubernetes metrics\\n\\n\\n- Discovered resources such as services, hosts, and containers that are part of the Kubernetes clusters\\n\\n\\n- Kubernetes metrics, host metrics, logs, processor info, anomaly detection, and universal profiling.\\n\\n\\n- Log analytics in Elastic Discover\\n\\n\\n- APM features that show app overview, transactions, dependencies, errors, and more:\\n\\n\\n![Java service in Elastic APM](/assets/images/elastic-opentelemetry-otel-operator/otel-java-service.png)\\n\\n\\n![OTel-based Application Java traces](/assets/images/elastic-opentelemetry-otel-operator/otel-java-traces.png)\\n\\n\\n## Try it out\\n\\n\\nElastic’s Distribution of OpenTelemetry (EDOT) transforms the observability experience by streamlining Kubernetes and application instrumentation. With EDOT, SREs and developers can bypass complex setups, instantly gain deep visibility into Kubernetes clusters, and capture critical metrics, logs, and traces—all within Elastic Observability. By following just a few simple steps, you’re empowered with a unified, efficient monitoring solution that brings your OpenTelemetry data directly into Elastic. With robust, out-of-the-box dashboards, automatic application instrumentation, and seamless integration, EDOT not only saves time but also enhances the accuracy and accessibility of observability across your infrastructure. Start leveraging EDOT today to unlock a frictionless observability experience and keep your systems running smoothly and insightfully.\\n\\n\\nAdditional resources:\\n\\n\\n- [Elastic Distributions of OpenTelemetry Overview](https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry)\\n\\n\\n- [OpenTelemetry Demo with Elastic Distributions](https://www.elastic.co/observability-labs/blog/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry)\\n\\n\\n- [Infrastructure Monitoring with OpenTelemetry in Elastic Observability](https://www.elastic.co/observability-labs/blog/infrastructure-monitoring-with-opentelemetry-in-elastic-observability)\\n\\n\\n- [Auto-Instrumenting Go Applications with OpenTelemetry](https://www.elastic.co/observability-labs/blog/auto-instrumentation-go-applications-opentelemetry)\\n\\n\\n- [Elastic Distribution OpenTelemetry Java Agent](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent)\\n\\n\\n- [Elastic OpenTelemetry Distribution for PHP](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-php)\\n","code":"var Component=(()=>{var p=Object.create;var s=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var b=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)s(n,i,{get:e[i],enumerable:!0})},a=(n,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let l of u(e))!g.call(n,l)&&l!==i&&s(n,l,{get:()=>e[l],enumerable:!(r=m(e,l))||r.enumerable});return n};var f=(n,e,i)=>(i=n!=null?p(b(n)):{},a(e||!n||!n.__esModule?s(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>a(s({},\\"__esModule\\",{value:!0}),n);var c=y((D,o)=>{o.exports=_jsx_runtime});var T={};w(T,{default:()=>d,frontmatter:()=>O});var t=f(c()),O={title:\\"Native OTel-based K8s & App Observability in 3 Steps with Elastic\\",slug:\\"elastic-opentelemetry-otel-operator\\",date:\\"2024-11-13\\",description:\\"Elastic\'s Distributions of OpenTelemetry are now supported with the OTel Operator, providing auto instrumentation of applications with EDOT SDKs, and deployment and lifecycle management of the EDOT OTel Collector for Kubernetes Observability. Learn how to configure this in 3 easy steps\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"OTel-operator.jpg\\",tags:[{slug:\\"kubernetes\\"},{slug:\\"opentelemetry\\"},{slug:\\"APM\\"}]};function h(n){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"Elastic recently released its Elastic Distributions of OpenTelemetry (EDOT) which have been developed to enhance the capabilities of standard OpenTelemetry distributions and improve existing OpenTelemetry support from Elastic. EDOT helps Elastic deliver its new Unified OpenTelemetry Experience. SRE\\\\u2019s are no longer burdened with a set of tedious steps instrumenting and ingesting OTel data into Observability. SREs get a simple and frictionless way to instrument the OTel collector, and applications, and ingest all the OTel data into Elastic. The components of this experience include: (detailed in the overview blog)\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry\\",rel:\\"nofollow\\",children:\\"Elastic Distributions for OpenTelemetry (EDOT)\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\\\\u2019s configuration for the OpenTelemetry Operator providing:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"OTel Lifecycle management for the OTel collector and SDKs\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Auto instrumentation of apps, which most developers will not instrument\\"}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Pre-packaged receivers, processors, exporters, and configuration for the OTel Kubernetes Collector\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Out-of-the-box OTel-based K8S dashboards for metrics and logs\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Discovered inventory views for services, hosts, and containers\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Direct OTel ingest into Elasticsearch (by passing APM) - all your data (logs, metrics, and traces) is now stored in Elastic\\\\u2019s Search AI Lake\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog we will cover how to ingest OTel for K8S and your application in 3 easy steps:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Copy the install commands from the UI\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Add the OpenTelemetry helm charts, Install the OpenTelemetry Operator with Elastic\\\\u2019s helm configuration & set your Elastic endpoint and authentication\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Annotate the app services you want to be auto-instrumented\\\\xA0\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Then you can easily see K8S metrics, logs and application logs, metrics, and traces in Elastic Observability.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/unified-otel-based-k8s-experience.png\\",alt:\\"OpenTelemetry Unified Obseervability Experience\\",width:\\"960\\",height:\\"540\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"To follow this blog you will need to have:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"An account on cloud.elastic.co, with access to get the Elasticsearch endpoint and authentication (api key)\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"A non-instrumented application with services based on Go, dotnet, Python, or Java. Auto-instrumentation through the OTel operator. In this example, we will be using the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),\\" application.\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"A Kubernetes cluster, we used EKS in our setup\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Helm and Kubectl loaded\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To find the authentication, you can find it in the integrations section of Elastic. More information is also available in the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/api-keys.html\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-api-keys.png\\",alt:\\"OpenTelemetry API Keys\\",width:\\"1215\\",height:\\"814\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"k8s-and-application-observability-in-elastic\\",children:\\"K8S and Application Observability in Elastic:\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we walk you through the steps, let\'s show you what is visible in Elastic.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once the Operator starts the OTel Collector, you can see the following in Elastic:\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"kubernetes-metrics\\",children:\\"Kubernetes metrics:\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Using an out-of-the-box dashboard, you can see node metrics, overall cluster metrics, and status across pods, deployments, etc.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-k8s-dashboard.png\\",alt:\\"OTel-based Kubernetes dashboard\\",width:\\"1810\\",height:\\"1085\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"discovered-inventory-for-hosts-services-and-containers\\",children:\\"Discovered Inventory for Hosts, services, and containers:\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This can be found at Observability->Inventory on the UI\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-k8s-inventory.png\\",alt:\\"OTel-based Kubernetes inventory\\",width:\\"1811\\",height:\\"1087\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"detailed-metrics-logs-and-processor-info-on-hosts\\",children:\\"Detailed metrics, logs, and processor info on hosts:\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This can be found at Observability->Infrastructure->Hosts\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-k8s-hosts.png\\",alt:\\"OTel-based Kubernetes host metrics\\",width:\\"1809\\",height:\\"1082\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"k8s-and-application-logs-in-elastics-new-discover-called-explorer\\",children:\\"K8S and application logs in Elastic\\\\u2019s New Discover (called Explorer)\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This can be found on Observability->Discover\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-ingest-logs.png\\",alt:\\"OTel-based Kubernetes logs\\",width:\\"1809\\",height:\\"1085\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"application-service-views-logs-metrics-and-traces\\",children:\\"Application Service views (logs, metrics, and traces):\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This can be found on Observability->Application\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Then select the service and drill down into different aspects.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-java-traces.png\\",alt:\\"OTel-based Application Java traces\\",width:\\"1569\\",height:\\"705\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Above we are showing how traces are shown using Native OTel data.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"steps-to-install\\",children:\\"Steps to install\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-0-follow-the-commands-listed-in-the-ui\\",children:\\"Step 0. Follow the commands listed in the UI\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Under Add data->Kubernetes->Kubernetes Monitoring with EDOT\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will find the following instructions, which we will follow here.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-edot-operator-install.png\\",alt:\\"EDOT Operator Install\\",width:\\"1351\\",height:\\"1113\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-install-the-edot-config-for-the-opentelemetry-operator\\",children:\\"Step 1. Install the EDOT config for the OpenTelemetry Operator\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Run the following commands. Please make sure that you have already authenticated in your K8s Cluster and this is where you will run the helm commands provided below.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`# Install helm repo needed\\nhelm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts --force-update\\n# Install needed secrets. Provide the Elasticsearch Endpoint URL and API key you have noted in previous steps\\nkubectl create ns opentelemetry-operator-system\\nkubectl create -n opentelemetry-operator-system secret generic elastic-secret-otel \\\\\\\\\\n --from-literal=elastic_endpoint=\'YOUR_ELASTICSEARCH_ENDPOINT\' \\\\\\\\\\n --from-literal=elastic_api_key=\'YOUR_ELASTICSEARCH_API_KEY\'\\n# Install the EDOT Operator\\nhelm install opentelemetry-kube-stack open-telemetry/opentelemetry-kube-stack --namespace opentelemetry-operator-system --create-namespace --values https://raw.githubusercontent.com/elastic/opentelemetry/refs/heads/main/resources/kubernetes/operator/helm/values.yaml --version 0.3.0\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The values.yaml file configuration can be found \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry/blob/main/resources/kubernetes/operator/helm/values.yaml\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1b-ensure-otel-data-is-arriving-in-elastic\\",children:\\"Step 1b: Ensure OTel data is arriving in Elastic\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The simplest way to check is to go to Menu > Dashboards > \\",(0,t.jsx)(e.strong,{children:\\"[OTEL][Metrics Kubernetes] Cluster Overview,\\"}),\\" and ensure you see the following dashboard being populated\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-k8s-dashboard.png\\",alt:\\"OTel-based Kubernetes dashboard\\",width:\\"1810\\",height:\\"1085\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-annotate-the-application-with-auto-instrumentation\\",children:\\"Step 2: Annotate the application with auto-instrumentation\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For this example, we\\\\u2019re only going to annotate one service, the favorite-java service in the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),\\" application\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Use the following commands to initiate auto-instrumentation:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`#Annotate Java namespace\\nkubectl annotate namespace java instrumentation.opentelemetry.io/inject-java=\\"opentelemetry-operator-system/elastic-instrumentation\\"\\n#Restart the java-app to get the new annotation\\nkubectl rollout restart deployment java-app -n java\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can also modify the yaml for your pod with the annotation\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`metadata:\\n name: my-app\\n annotations:\\n instrumentation.opentelemetry.io/inject-python: \\"true\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"These instructions are provided in the UI:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-edot-sdk-annotate.png\\",alt:\\"Annotate Application with EDOT SDK\\",width:\\"1218\\",height:\\"479\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"check-out-the-service-data-in-elastic-apm\\",children:\\"Check out the service data in Elastic APM\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once the OTel data is in Elastic, you can see:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Out-of-the-box dashboards for OTel-based Kubernetes metrics\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Discovered resources such as services, hosts, and containers that are part of the Kubernetes clusters\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Kubernetes metrics, host metrics, logs, processor info, anomaly detection, and universal profiling.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Log analytics in Elastic Discover\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"APM features that show app overview, transactions, dependencies, errors, and more:\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-java-service.png\\",alt:\\"Java service in Elastic APM\\",width:\\"1580\\",height:\\"913\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-otel-operator/otel-java-traces.png\\",alt:\\"OTel-based Application Java traces\\",width:\\"1569\\",height:\\"705\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\\\\u2019s Distribution of OpenTelemetry (EDOT) transforms the observability experience by streamlining Kubernetes and application instrumentation. With EDOT, SREs and developers can bypass complex setups, instantly gain deep visibility into Kubernetes clusters, and capture critical metrics, logs, and traces\\\\u2014all within Elastic Observability. By following just a few simple steps, you\\\\u2019re empowered with a unified, efficient monitoring solution that brings your OpenTelemetry data directly into Elastic. With robust, out-of-the-box dashboards, automatic application instrumentation, and seamless integration, EDOT not only saves time but also enhances the accuracy and accessibility of observability across your infrastructure. Start leveraging EDOT today to unlock a frictionless observability experience and keep your systems running smoothly and insightfully.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Additional resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry\\",rel:\\"nofollow\\",children:\\"Elastic Distributions of OpenTelemetry Overview\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Demo with Elastic Distributions\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/infrastructure-monitoring-with-opentelemetry-in-elastic-observability\\",rel:\\"nofollow\\",children:\\"Infrastructure Monitoring with OpenTelemetry in Elastic Observability\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/auto-instrumentation-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-Instrumenting Go Applications with OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"Elastic Distribution OpenTelemetry Java Agent\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-php\\",rel:\\"nofollow\\",children:\\"Elastic OpenTelemetry Distribution for PHP\\"})}),`\\n`]}),`\\n`]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(T);})();\\n;return Component;"},"_id":"articles/elastic-opentelemetry-otel-operator.mdx","_raw":{"sourceFilePath":"articles/elastic-opentelemetry-otel-operator.mdx","sourceFileName":"elastic-opentelemetry-otel-operator.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-opentelemetry-otel-operator"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-otel-operator/OTel-operator.jpg","readingTime":"6 min read","url":"/elastic-opentelemetry-otel-operator","headings":[{"level":2,"title":"K8S and Application Observability in Elastic:","href":"#k8s-and-application-observability-in-elastic"},{"level":3,"title":"Kubernetes metrics:","href":"#kubernetes-metrics"},{"level":3,"title":"Discovered Inventory for Hosts, services, and containers:","href":"#discovered-inventory-for-hosts-services-and-containers"},{"level":3,"title":"Detailed metrics, logs, and processor info on hosts:","href":"#detailed-metrics-logs-and-processor-info-on-hosts"},{"level":3,"title":"K8S and application logs in Elastic’s New Discover (called Explorer)","href":"#k8s-and-application-logs-in-elastics-new-discover-called-explorer"},{"level":3,"title":"Application Service views (logs, metrics, and traces):","href":"#application-service-views-logs-metrics-and-traces"},{"level":2,"title":"Steps to install","href":"#steps-to-install"},{"level":3,"title":"Step 0. Follow the commands listed in the UI ","href":"#step-0-follow-the-commands-listed-in-the-ui-"},{"level":3,"title":"Step 1. Install the EDOT config for the OpenTelemetry Operator","href":"#step-1-install-the-edot-config-for-the-opentelemetry-operator"},{"level":3,"title":"Step 1b: Ensure OTel data is arriving in Elastic","href":"#step-1b-ensure-otel-data-is-arriving-in-elastic"},{"level":3,"title":"Step 2: Annotate the application with auto-instrumentation","href":"#step-2-annotate-the-application-with-auto-instrumentation"},{"level":2,"title":"Check out the service data in Elastic APM","href":"#check-out-the-service-data-in-elastic-apm"},{"level":2,"title":"Try it out","href":"#try-it-out"}]},{"title":"Elastic now providing distributions for OpenTelemetry SDKs","slug":"elastic-opentelemetry-sdk-distributions","date":"2024-04-03","description":"Adopting OpenTelemetry native standards for instrumenting and observing applications","image":"OTel-2.jpg","author":[{"slug":"steve-gordon","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nIf you develop applications, you may have heard about [OpenTelemetry](https://opentelemetry.io/). At Elastic\xae, we are enthusiastic about OpenTelemetry as the future of standardized application instrumentation and observability.\\n\\nIn this post, we share our plans to expand our adoption of and commitment to OpenTelemetry with the introduction of Elastic distributions of the OpenTelemetry language SDKs, which will complement our existing Elastic APM agents.\\n\\n## What is OpenTelemetry?\\n\\nOpenTelemetry is a vendor-neutral observability framework and toolkit that supports telemetry signals such as traces, metrics, and logs in applications and distributed microservice-based architectures.\\n\\nDriven by a set of standards, OpenTelemetry is designed to provide a consistent approach to instrumenting and observing application behavior. OpenTelemetry is an incubating project developed under the Cloud Native Computing Foundation ([CNCF](https://www.cncf.io/)) umbrella and is currently the second most active project, topped only by Kubernetes.\\n\\nYou can read more on the [OpenTelemetry website](https://opentelemetry.io/docs/what-is-opentelemetry/) about the concepts, terminology, and techniques for adopting OpenTelemetry.\\n\\n## A richer instrumentation landscape\\n\\nBy adopting OpenTelemetry, software code can be instrumented in a vendor-agnostic fashion, with telemetry signals exported in a standardized format to one or more vendor backends, such as [Elastic APM](https://www.elastic.co/observability/application-performance-monitoring). Its design provides flexibility for application owners to switch out vendor backends with no code changes and use [OpenTelemetry collectors](https://opentelemetry.io/docs/collector/) to send telemetry data to multiple backends.\\n\\nBecause OpenTelemetry is not a vendor-specific solution, it is much easier for language ecosystems to adopt it and provide robust instrumentations. Vendors don’t have to implement specific instrumentations themselves anymore. OpenTelemetry is a standard, and it is in the interest of library developers to introduce and maintain instrumentations from which all consumers can benefit.\\n\\nAs a result, more instrumentation libraries are available and better kept up to date. If your company has open-source libraries, you can also contribute and create your own instrumentations to make it easier for your customers to adopt OpenTelemetry and benefit from richer traces, metrics, and logging in their applications.\\n\\n## Elastic and OpenTelemetry\\n\\nElastic is deeply involved in OpenTelemetry. In 2023, we donated the [Elastic Common Schema](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement), which is being merged with the [Semantic Conventions](https://opentelemetry.io/docs/specs/semconv/). In 2024, we are in the process of donating our [profiling agent based on eBPF](https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry). We also have multiple contributors to various areas of OpenTelemetry across the organization.\\n\\nWe are therefore committed to helping OpenTelemetry succeed, which means, in some cases, beginning to shift away from Elastic-specific components and recommend using OpenTelemetry components instead.\\n\\nElastic is committed to supporting and contributing to OpenTelemetry. Our APM solution already accepts native OTLP (OpenTelemetry Protocol) data, and many of our APM agents have already bridged data collection and transmission from applications instrumented using the OpenTelemetry APIs.\\n\\nThe next step on our journey is introducing Elastic distributions for the language SDKs and donating features upstream to the OpenTelemetry community by contributing to the OpenTelemetry SDK repositories.\\n\\n## What is an OpenTelemetry distribution?\\n\\nAn [OpenTelemetry distribution](https://opentelemetry.io/docs/concepts/distributions/) is simply a customized version of one or more OpenTelemetry components. Each distribution extends the core functionality offered by the component while adhering to its API and existing features, utilizing built-in extension points.\\n\\n## The Elastic OpenTelemetry SDK distributions\\n\\nWith the release of Elastic distributions of the OpenTelemetry SDKs, we are extending our backing of OpenTelemetry as the preferred and recommended choice for instrumenting applications.\\n\\nOpenTelemetry maintains and ships many language APIs and SDKs for observing applications using OpenTelemetry. The APIs provide a language-specific interface for instrumenting application code, while the SDK implements that API, enabling signals from observed applications to be collected and exported.\\n\\nOur current work extends the OpenTelemetry language SDKs to introduce additional features and ensure that the exported data provides the most robust compatibility with our current backend while it evolves to become more OpenTelemetry native.\\n\\nAdditional features include reimplementing concepts currently available in the Elastic APM Agent but not part of the OpenTelemetry SDK. The distributions allow us to ship with opinionated defaults for all signals that are known to provide the best integration with [Elastic’s Observability](https://www.elastic.co/observability) offering.\\n\\nIt’s undoubtedly possible to use the OpenTelemetry APIs to instrument code and then reference the OpenTelemetry SDK to enable the collection of the trace, metric, and log data that applications produce. Elastic APM accepts native OTLP data, so you can configure the OpenTelemetry SDK to export telemetry data directly to an Elastic backend. We refer to this setup as using the “vanilla” (a.k.a. “native”) OpenTelemetry SDK.\\n\\nWork is ongoing to improve support for storing and presenting OpenTelemetry data natively in our backend so that we can drive our observability UIs directly from the data from the various telemetry signals. Our work focuses on ensuring that the Elastic-curated UIs can seamlessly handle the ECS and OpenTelemetry formats. Alongside this effort, we are working on distributions of the language SDKs to support customers looking to adopt OpenTelemetry-native instrumentation in their applications.\\n\\nThe [current Elastic APM Agents](https://www.elastic.co/guide/en/apm/agent/index.html) support features such as central configuration and span compression that are not part of the OpenTelemetry specification as of today. We are investing our engineering expertise to bring those features to a broader audience by contributing them to OpenTelemetry. Because standardization takes time, we can more rapidly bring these features to the OpenTelemetry community and our customers by providing distributions.\\n\\nWe believe the responsible choice is to concentrate on enabling and encouraging customers to favor vendor-neutral instrumentation in their code and reap the benefits of OpenTelemetry.\\n\\nDistributions best serve our decision to fully adopt and recommend OpenTelemetry as the preferred solution for observing applications. By providing features that are currently unavailable in the “vanilla” OpenTelemetry SDK, we can support customers who want to adopt OpenTelemetry native, vendor-agnostic instrumentation in their applications while still providing the same set of features and backend capabilities they enjoy today with the existing APM Agents. By maintaining Elastic distributions, we can also better support our customers with enhancements and fixes outside of the release cycle of the “vanilla” OpenTelemetry SDKs, which we believe to be a crucial differentiating factor in choosing them.\\n\\nOur vision is that Elastic will work with the OpenTelemetry community to donate features through the standardization processes and contribute the code to implement those in the native OpenTelemetry SDKs. In time, we hope to see many Elastic APM Agent-exclusive features transition into OpenTelemetry to the point where an Elastic distribution may no longer be necessary. In the meantime, we can deliver those capabilities via our OpenTelemetry distributions.\\n\\nApplication developers then have several options for instrumenting and collecting telemetry data from their applications:\\n\\n1. **Elastic APM Agent:** The most fully featured, however, vendor-specific\\n2. **Elastic APM Agent with OpenTelemetry Bridge:** Vendor-neutral instrumentation API, but with known limitations:\\n\\n 1. Only supports bridging of traces (no metrics support)\\n 2. Does not support OpenTelemetry span events\\n\\n3. **OpenTelemetry “vanilla” SDK:** Fully supported today; however, it lacks some features of Elastic APM Agent, such as span compression\\n4. **Elastic OpenTelemetry Distribution:**\\n\\n 1. Supports vendor-neutral instrumentation and no Elastic-specific configuration in code by default\\n 2. Recommended defaults when using Elastic Observability as a backend\\n 3. Use OpenTelemetry APIs to further customize our defaults; no new APIs to learn\\n\\nWhile we continue to support all options to instrument your code for the foreseeable future, we think we are setting our customers up for success by introducing a fourth OpenTelemetry-native offering. We expect this will become the preferred default for Elastic customers in due time.\\n\\nWe currently have distributions in alpha release status for [.NET](https://github.com/elastic/elastic-otel-dotnet) and [Java](https://github.com/elastic/elastic-otel-java), with additional language distributions coming very soon. We encourage you to check out those repositories, try out the distributions, and provide feedback to us via issues. Your valued input allows us to refine our designs and steer our direction to ensure that our distributions delight consumers.\\n\\n[_**Learn about the alpha release of our new Elastic distribution of the OpenTelemetry SDK for .NET.**_](https://www.elastic.co/blog/elastic-opentelemetry-distribution-dotnet-applications)\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var h=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var g=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var i in e)r(n,i,{get:e[i],enumerable:!0})},s=(n,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!y.call(n,o)&&o!==i&&r(n,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return n};var w=(n,e,i)=>(i=n!=null?h(f(n)):{},s(e||!n||!n.__esModule?r(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>s(r({},\\"__esModule\\",{value:!0}),n);var c=g((E,l)=>{l.exports=_jsx_runtime});var O={};b(O,{default:()=>p,frontmatter:()=>T});var t=w(c()),T={title:\\"Elastic now providing distributions for OpenTelemetry SDKs\\",slug:\\"elastic-opentelemetry-sdk-distributions\\",description:\\"Adopting OpenTelemetry native standards for instrumenting and observing applications\\",author:[{slug:\\"steve-gordon\\"}],tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"instrumentation\\"}],date:\\"2024-04-03\\",image:\\"OTel-2.jpg\\"};function d(n){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"If you develop applications, you may have heard about \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\". At Elastic\\\\xAE, we are enthusiastic about OpenTelemetry as the future of standardized application instrumentation and observability.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this post, we share our plans to expand our adoption of and commitment to OpenTelemetry with the introduction of Elastic distributions of the OpenTelemetry language SDKs, which will complement our existing Elastic APM agents.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-is-opentelemetry\\",children:\\"What is OpenTelemetry?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"OpenTelemetry is a vendor-neutral observability framework and toolkit that supports telemetry signals such as traces, metrics, and logs in applications and distributed microservice-based architectures.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Driven by a set of standards, OpenTelemetry is designed to provide a consistent approach to instrumenting and observing application behavior. OpenTelemetry is an incubating project developed under the Cloud Native Computing Foundation (\\",(0,t.jsx)(e.a,{href:\\"https://www.cncf.io/\\",rel:\\"nofollow\\",children:\\"CNCF\\"}),\\") umbrella and is currently the second most active project, topped only by Kubernetes.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can read more on the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/what-is-opentelemetry/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry website\\"}),\\" about the concepts, terminology, and techniques for adopting OpenTelemetry.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"a-richer-instrumentation-landscape\\",children:\\"A richer instrumentation landscape\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"By adopting OpenTelemetry, software code can be instrumented in a vendor-agnostic fashion, with telemetry signals exported in a standardized format to one or more vendor backends, such as \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"Elastic APM\\"}),\\". Its design provides flexibility for application owners to switch out vendor backends with no code changes and use \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry collectors\\"}),\\" to send telemetry data to multiple backends.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Because OpenTelemetry is not a vendor-specific solution, it is much easier for language ecosystems to adopt it and provide robust instrumentations. Vendors don\\\\u2019t have to implement specific instrumentations themselves anymore. OpenTelemetry is a standard, and it is in the interest of library developers to introduce and maintain instrumentations from which all consumers can benefit.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As a result, more instrumentation libraries are available and better kept up to date. If your company has open-source libraries, you can also contribute and create your own instrumentations to make it easier for your customers to adopt OpenTelemetry and benefit from richer traces, metrics, and logging in their applications.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"elastic-and-opentelemetry\\",children:\\"Elastic and OpenTelemetry\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic is deeply involved in OpenTelemetry. In 2023, we donated the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-announcement\\",rel:\\"nofollow\\",children:\\"Elastic Common Schema\\"}),\\", which is being merged with the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/semconv/\\",rel:\\"nofollow\\",children:\\"Semantic Conventions\\"}),\\". In 2024, we are in the process of donating our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry\\",rel:\\"nofollow\\",children:\\"profiling agent based on eBPF\\"}),\\". We also have multiple contributors to various areas of OpenTelemetry across the organization.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We are therefore committed to helping OpenTelemetry succeed, which means, in some cases, beginning to shift away from Elastic-specific components and recommend using OpenTelemetry components instead.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic is committed to supporting and contributing to OpenTelemetry. Our APM solution already accepts native OTLP (OpenTelemetry Protocol) data, and many of our APM agents have already bridged data collection and transmission from applications instrumented using the OpenTelemetry APIs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The next step on our journey is introducing Elastic distributions for the language SDKs and donating features upstream to the OpenTelemetry community by contributing to the OpenTelemetry SDK repositories.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-is-an-opentelemetry-distribution\\",children:\\"What is an OpenTelemetry distribution?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"An \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/concepts/distributions/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry distribution\\"}),\\" is simply a customized version of one or more OpenTelemetry components. Each distribution extends the core functionality offered by the component while adhering to its API and existing features, utilizing built-in extension points.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"the-elastic-opentelemetry-sdk-distributions\\",children:\\"The Elastic OpenTelemetry SDK distributions\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With the release of Elastic distributions of the OpenTelemetry SDKs, we are extending our backing of OpenTelemetry as the preferred and recommended choice for instrumenting applications.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"OpenTelemetry maintains and ships many language APIs and SDKs for observing applications using OpenTelemetry. The APIs provide a language-specific interface for instrumenting application code, while the SDK implements that API, enabling signals from observed applications to be collected and exported.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Our current work extends the OpenTelemetry language SDKs to introduce additional features and ensure that the exported data provides the most robust compatibility with our current backend while it evolves to become more OpenTelemetry native.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Additional features include reimplementing concepts currently available in the Elastic APM Agent but not part of the OpenTelemetry SDK. The distributions allow us to ship with opinionated defaults for all signals that are known to provide the best integration with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s Observability\\"}),\\" offering.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"It\\\\u2019s undoubtedly possible to use the OpenTelemetry APIs to instrument code and then reference the OpenTelemetry SDK to enable the collection of the trace, metric, and log data that applications produce. Elastic APM accepts native OTLP data, so you can configure the OpenTelemetry SDK to export telemetry data directly to an Elastic backend. We refer to this setup as using the \\\\u201Cvanilla\\\\u201D (a.k.a. \\\\u201Cnative\\\\u201D) OpenTelemetry SDK.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Work is ongoing to improve support for storing and presenting OpenTelemetry data natively in our backend so that we can drive our observability UIs directly from the data from the various telemetry signals. Our work focuses on ensuring that the Elastic-curated UIs can seamlessly handle the ECS and OpenTelemetry formats. Alongside this effort, we are working on distributions of the language SDKs to support customers looking to adopt OpenTelemetry-native instrumentation in their applications.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/index.html\\",rel:\\"nofollow\\",children:\\"current Elastic APM Agents\\"}),\\" support features such as central configuration and span compression that are not part of the OpenTelemetry specification as of today. We are investing our engineering expertise to bring those features to a broader audience by contributing them to OpenTelemetry. Because standardization takes time, we can more rapidly bring these features to the OpenTelemetry community and our customers by providing distributions.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We believe the responsible choice is to concentrate on enabling and encouraging customers to favor vendor-neutral instrumentation in their code and reap the benefits of OpenTelemetry.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Distributions best serve our decision to fully adopt and recommend OpenTelemetry as the preferred solution for observing applications. By providing features that are currently unavailable in the \\\\u201Cvanilla\\\\u201D OpenTelemetry SDK, we can support customers who want to adopt OpenTelemetry native, vendor-agnostic instrumentation in their applications while still providing the same set of features and backend capabilities they enjoy today with the existing APM Agents. By maintaining Elastic distributions, we can also better support our customers with enhancements and fixes outside of the release cycle of the \\\\u201Cvanilla\\\\u201D OpenTelemetry SDKs, which we believe to be a crucial differentiating factor in choosing them.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Our vision is that Elastic will work with the OpenTelemetry community to donate features through the standardization processes and contribute the code to implement those in the native OpenTelemetry SDKs. In time, we hope to see many Elastic APM Agent-exclusive features transition into OpenTelemetry to the point where an Elastic distribution may no longer be necessary. In the meantime, we can deliver those capabilities via our OpenTelemetry distributions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Application developers then have several options for instrumenting and collecting telemetry data from their applications:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Elastic APM Agent:\\"}),\\" The most fully featured, however, vendor-specific\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Elastic APM Agent with OpenTelemetry Bridge:\\"}),\\" Vendor-neutral instrumentation API, but with known limitations:\\"]}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Only supports bridging of traces (no metrics support)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Does not support OpenTelemetry span events\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"OpenTelemetry \\\\u201Cvanilla\\\\u201D SDK:\\"}),\\" Fully supported today; however, it lacks some features of Elastic APM Agent, such as span compression\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Elastic OpenTelemetry Distribution:\\"})}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Supports vendor-neutral instrumentation and no Elastic-specific configuration in code by default\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Recommended defaults when using Elastic Observability as a backend\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Use OpenTelemetry APIs to further customize our defaults; no new APIs to learn\\"}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"While we continue to support all options to instrument your code for the foreseeable future, we think we are setting our customers up for success by introducing a fourth OpenTelemetry-native offering. We expect this will become the preferred default for Elastic customers in due time.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We currently have distributions in alpha release status for \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-dotnet\\",rel:\\"nofollow\\",children:\\".NET\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java\\",rel:\\"nofollow\\",children:\\"Java\\"}),\\", with additional language distributions coming very soon. We encourage you to check out those repositories, try out the distributions, and provide feedback to us via issues. Your valued input allows us to refine our designs and steer our direction to ensure that our distributions delight consumers.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-opentelemetry-distribution-dotnet-applications\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.em,{children:(0,t.jsx)(e.strong,{children:\\"Learn about the alpha release of our new Elastic distribution of the OpenTelemetry SDK for .NET.\\"})})})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function p(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return v(O);})();\\n;return Component;"},"_id":"articles/elastic-opentelemetry-sdk-distributions.mdx","_raw":{"sourceFilePath":"articles/elastic-opentelemetry-sdk-distributions.mdx","sourceFileName":"elastic-opentelemetry-sdk-distributions.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-opentelemetry-sdk-distributions"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-sdk-distributions/OTel-2.jpg","readingTime":"7 min read","url":"/elastic-opentelemetry-sdk-distributions","headings":[{"level":2,"title":"What is OpenTelemetry?","href":"#what-is-opentelemetry"},{"level":2,"title":"A richer instrumentation landscape","href":"#a-richer-instrumentation-landscape"},{"level":2,"title":"Elastic and OpenTelemetry","href":"#elastic-and-opentelemetry"},{"level":2,"title":"What is an OpenTelemetry distribution?","href":"#what-is-an-opentelemetry-distribution"},{"level":2,"title":"The Elastic OpenTelemetry SDK distributions","href":"#the-elastic-opentelemetry-sdk-distributions"}]},{"title":"FAQ - Elastic contributes its Universal Profiling agent to OpenTelemetry","slug":"elastic-profiling-agent-acceptance-opentelemetry-faq","date":"2024-06-06","description":"Elastic is advancing the adoption of OpenTelemetry with the contribution of its universal profiling agent. Elastic is committed to ensuring a vendor-agnostic ingestion and collection of observability and security telemetry through OpenTelemetry.","image":"profiling-acceptance-faq.png","author":[{"slug":"elastic-observability-team","type":"Author","_raw":{}}],"tags":[{"slug":"universal-profiling","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}}],"body":{"raw":"\\n## What is being announced?\\n\\nElastic’s [donation proposal](https://github.com/open-telemetry/community/issues/1918) for contributing its Universal Profiling™ agent has now been accepted by the OpenTelemetry community. Elastic’s Universal Profiling agent, the industry’s most comprehensive fleet-wide Universal Profiling solution, empowers users to quickly identify performance bottlenecks, reduce cloud spend, and minimize their carbon footprint. With the contribution of the Elastic Universal Profiling Agent to OpenTelemetry, all customers will benefit from its features and capabilities.\\n\\n\\n## What do Elastic users need to know?\\n\\nElastic’s contribution of the continuous profiling agent will not change the existing set of Elastic’s continuous profiling features or how we ingest and store profiling data.\xa0\\n\\nElastic will participate and closely collaborate with the OTel community to manage not only the addition of the continuous profiling agent to OTel but also work with and drive the OTel community’s Profiling Special Interest Group (SIG) in shaping OTel’s continuous profiling evolution.\xa0\\n\\nElastic has facilitated the definition of the OTel [Profiling Data Model](https://github.com/open-telemetry/oteps/blob/main/text/profiles/0239-profiles-data-model.md), a crucial step toward standardizing profiling data. Moreover, the recent merge of the [OpenTelemetry Enhancement Proposal (OTEP) introducing profiling support to the OpenTelemetry Protocol (OTLP)](https://github.com/open-telemetry/oteps/pull/239) marked an additional milestone.\xa0\\n\\n\\n## Why is Elastic contributing its Profiling Agent to OTel?\\n\\nThis contribution not only accelerates the standardization of continuous profiling but also makes continuous profiling the 4th key signal in observability. This empowers everyone in the observability community to continuously profile with a standardized agent. The addition of Elastic’s continuous profiling agent will:\\n\\n- Align efforts around a single standard poised for broad adoption by users.\\n\\n- Drive better visibility and improvement of resource usage and cost management for operations.\\n\\n- Enable vendors and the community to focus on richer features versus dealing with data transformation tasks.\\n\\n- Enable continuous profiling to become the 4th key signal in Observability.\\n\\n- Increase continuous profiling adoption and the continued evolution and convergence of observability and security domains.\\n\\n\\n## Why is continuous profiling needed by organizations?\\n\\nThe contribution of Elastic’s continuous profiling agent now helps customers realize the following benefits of continuous profiling:\\n\\n- Maximize gross margins: By reducing the computational resources needed to run applications, businesses can optimize their cloud spend and improve profitability. Whole-system continuous profiling is one way of identifying the most expensive applications (down to the lines of code) across diverse environments that may span multiple cloud providers. This principle aligns with the familiar adage, \\"A penny saved is a penny earned.\\" In the cloud context, every CPU cycle saved translates to money saved.\xa0\\n\\n* Minimize environmental impact: Energy consumption associated with computing is a growing concern (source: [MIT Energy Initiative](https://energy.mit.edu/news/energy-efficient-computing/)). More efficient code translates to lower energy consumption, contributing to a reduction in carbon (CO2) footprint.\xa0\\n\\n- Accelerate engineering workflows: Continuous profiling provides detailed insights to help debug complex issues faster, guide development, and improve overall code quality.\\n\\nWith these benefits, customers can now not only manage the overall application’s efficiency on the cloud, but also ensure the application is optimally developed.\\n\\n\\n## What is continuous profiling?\\n\\nElastic’s continuous profiling agent is a whole-system, always-on, continuous profiling solution that eliminates the need for run-time/bytecode instrumentation, recompilation, on-host debug symbols or service restarts.\xa0\xa0\xa0\\n\\nProfiling helps organizations run efficient services by minimizing computational wastage, thereby reducing operational costs. Leveraging [eBPF](https://ebpf.io/), the Elastic profiling agent provides unprecedented visibility into the runtime behavior of all applications: it builds stack traces that go from the kernel, through userspace native code, all the way into code running in higher level runtimes, enabling you to identify performance regressions, reduce wasteful computations, and debug complex issues faster.\xa0\\n\\nTo this end, it measures code efficiency in three dimensions: CPU utilization, CO2, and cloud cost. This approach resonates with the sustainability objectives of our customers –– ensuring that Elastic continuous profiling aligns seamlessly with their strategic [ESG](https://en.wikipedia.org/wiki/Environmental,_social,_and_corporate_governance) goals\\n\\n\\n## Does Elastic support OpenTelemetry today?\\n\\n[Elastic supports OTel natively](https://www.elastic.co/observability/opentelemetry). Elastic users can send OTel data directly from applications or through the OTel collector into Elastic APM, which processes both OTel SemConv and ECS. With this native OTel support, all [Elastic APM capabilities](https://www.elastic.co/observability/application-performance-monitoring) are available with OTel. [See Elastic documentation to learn more about OTel integration](https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html).\\n\\n \\n\\n![Native OpenTelemetry Support in Elastic](/assets/images/elastic-profiling-agent-acceptance-opentelemetry-faq/blog-elastic-otel-2.png)\\n\\n \\n \\n\\n## Where can I learn more about Elastic’s Universal Profiling?\\n\\nElastic’s resources help you understand continuous profiling and how to use it in different scenarios:\\n\\n****\\n\\n- [Elastic Universal Profiling home page](https://www.elastic.co/observability/universal-profiling)\\n\\n- [Elastic Universal Profiling agent going open source under Apache 2](https://www.elastic.co/blog/elastic-universal-profiling-agent-open-source)\\n\\n- [Pinpointing performance issues with profiling](https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation)\\n\\n- [Elastic releases Universal Profiling](https://www.elastic.co/blog/continuous-profiling-is-generally-available)\\n\\n- [Whole system profiling with Universal Profiling](https://www.elastic.co/blog/whole-system-visibility-elastic-universal-profiling)\\n\\n- [Cost-effective applications with Universal Profiling](https://www.elastic.co/blog/continuous-profiling-efficient-cost-effective-applications)\\n\\n- [Elastic documentation on Universal Profiling](https://www.elastic.co/guide/en/observability/current/universal-profiling.html)\\n","code":"var Component=(()=>{var p=Object.create;var l=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),y=(n,e)=>{for(var t in e)l(n,t,{get:e[t],enumerable:!0})},s=(n,e,t,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!m.call(n,o)&&o!==t&&l(n,o,{get:()=>e[o],enumerable:!(r=u(e,o))||r.enumerable});return n};var b=(n,e,t)=>(t=n!=null?p(f(n)):{},s(e||!n||!n.__esModule?l(t,\\"default\\",{value:n,enumerable:!0}):t,n)),v=n=>s(l({},\\"__esModule\\",{value:!0}),n);var c=w((P,a)=>{a.exports=_jsx_runtime});var T={};y(T,{default:()=>d,frontmatter:()=>E});var i=b(c()),E={title:\\"FAQ - Elastic contributes its Universal Profiling agent to OpenTelemetry\\",slug:\\"elastic-profiling-agent-acceptance-opentelemetry-faq\\",date:\\"2024-06-06\\",description:\\"Elastic is advancing the adoption of OpenTelemetry with the contribution of its universal profiling agent. Elastic is committed to ensuring a vendor-agnostic ingestion and collection of observability and security telemetry through OpenTelemetry.\\",author:[{slug:\\"elastic-observability-team\\"}],image:\\"profiling-acceptance-faq.png\\",tags:[{slug:\\"universal-profiling\\"},{slug:\\"opentelemetry\\"}]};function h(n){let e={a:\\"a\\",h2:\\"h2\\",hr:\\"hr\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",ul:\\"ul\\",...n.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(e.h2,{id:\\"what-is-being-announced\\",children:\\"What is being announced?\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic\\\\u2019s \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/community/issues/1918\\",rel:\\"nofollow\\",children:\\"donation proposal\\"}),\\" for contributing its Universal Profiling\\\\u2122 agent has now been accepted by the OpenTelemetry community. Elastic\\\\u2019s Universal Profiling agent, the industry\\\\u2019s most comprehensive fleet-wide Universal Profiling solution, empowers users to quickly identify performance bottlenecks, reduce cloud spend, and minimize their carbon footprint. With the contribution of the Elastic Universal Profiling Agent to OpenTelemetry, all customers will benefit from its features and capabilities.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"what-do-elastic-users-need-to-know\\",children:\\"What do Elastic users need to know?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic\\\\u2019s contribution of the continuous profiling agent will not change the existing set of Elastic\\\\u2019s continuous profiling features or how we ingest and store profiling data.\\\\xA0\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic will participate and closely collaborate with the OTel community to manage not only the addition of the continuous profiling agent to OTel but also work with and drive the OTel community\\\\u2019s Profiling Special Interest Group (SIG) in shaping OTel\\\\u2019s continuous profiling evolution.\\\\xA0\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic has facilitated the definition of the OTel \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/oteps/blob/main/text/profiles/0239-profiles-data-model.md\\",rel:\\"nofollow\\",children:\\"Profiling Data Model\\"}),\\", a crucial step toward standardizing profiling data. Moreover, the recent merge of the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/oteps/pull/239\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Enhancement Proposal (OTEP) introducing profiling support to the OpenTelemetry Protocol (OTLP)\\"}),\\" marked an additional milestone.\\\\xA0\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"why-is-elastic-contributing-its-profiling-agent-to-otel\\",children:\\"Why is Elastic contributing its Profiling Agent to OTel?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"This contribution not only accelerates the standardization of continuous profiling but also makes continuous profiling the 4th key signal in observability. This empowers everyone in the observability community to continuously profile with a standardized agent. The addition of Elastic\\\\u2019s continuous profiling agent will:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Align efforts around a single standard poised for broad adoption by users.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Drive better visibility and improvement of resource usage and cost management for operations.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Enable vendors and the community to focus on richer features versus dealing with data transformation tasks.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Enable continuous profiling to become the 4th key signal in Observability.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Increase continuous profiling adoption and the continued evolution and convergence of observability and security domains.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"why-is-continuous-profiling-needed-by-organizations\\",children:\\"Why is continuous profiling needed by organizations?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"The contribution of Elastic\\\\u2019s continuous profiling agent now helps customers realize the following benefits of continuous profiling:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\'Maximize gross margins: By reducing the computational resources needed to run applications, businesses can optimize their cloud spend and improve profitability. Whole-system continuous profiling is one way of identifying the most expensive applications (down to the lines of code) across diverse environments that may span multiple cloud providers. This principle aligns with the familiar adage, \\"A penny saved is a penny earned.\\" In the cloud context, every CPU cycle saved translates to money saved.\\\\xA0\'}),`\\n`]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[\\"Minimize environmental impact: Energy consumption associated with computing is a growing concern (source: \\",(0,i.jsx)(e.a,{href:\\"https://energy.mit.edu/news/energy-efficient-computing/\\",rel:\\"nofollow\\",children:\\"MIT Energy Initiative\\"}),\\"). More efficient code translates to lower energy consumption, contributing to a reduction in carbon (CO2) footprint.\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Accelerate engineering workflows: Continuous profiling provides detailed insights to help debug complex issues faster, guide development, and improve overall code quality.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"With these benefits, customers can now not only manage the overall application\\\\u2019s efficiency on the cloud, but also ensure the application is optimally developed.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"what-is-continuous-profiling\\",children:\\"What is continuous profiling?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic\\\\u2019s continuous profiling agent is a whole-system, always-on, continuous profiling solution that eliminates the need for run-time/bytecode instrumentation, recompilation, on-host debug symbols or service restarts.\\\\xA0\\\\xA0\\\\xA0\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Profiling helps organizations run efficient services by minimizing computational wastage, thereby reducing operational costs. Leveraging \\",(0,i.jsx)(e.a,{href:\\"https://ebpf.io/\\",rel:\\"nofollow\\",children:\\"eBPF\\"}),\\", the Elastic profiling agent provides unprecedented visibility into the runtime behavior of all applications: it builds stack traces that go from the kernel, through userspace native code, all the way into code running in higher level runtimes, enabling you to identify performance regressions, reduce wasteful computations, and debug complex issues faster.\\\\xA0\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"To this end, it measures code efficiency in three dimensions: CPU utilization, CO2, and cloud cost. This approach resonates with the sustainability objectives of our customers \\\\u2013\\\\u2013 ensuring that Elastic continuous profiling aligns seamlessly with their strategic \\",(0,i.jsx)(e.a,{href:\\"https://en.wikipedia.org/wiki/Environmental,_social,_and_corporate_governance\\",rel:\\"nofollow\\",children:\\"ESG\\"}),\\" goals\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"does-elastic-support-opentelemetry-today\\",children:\\"Does Elastic support OpenTelemetry today?\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"Elastic supports OTel natively\\"}),\\". Elastic users can send OTel data directly from applications or through the OTel collector into Elastic APM, which processes both OTel SemConv and ECS. With this native OTel support, all \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"Elastic APM capabilities\\"}),\\" are available with OTel. \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html\\",rel:\\"nofollow\\",children:\\"See Elastic documentation to learn more about OTel integration\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-profiling-agent-acceptance-opentelemetry-faq/blog-elastic-otel-2.png\\",alt:\\"Native OpenTelemetry Support in Elastic\\",width:\\"1244\\",height:\\"479\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"where-can-i-learn-more-about-elastics-universal-profiling\\",children:\\"Where can I learn more about Elastic\\\\u2019s Universal Profiling?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic\\\\u2019s resources help you understand continuous profiling and how to use it in different scenarios:\\"}),`\\n`,(0,i.jsx)(e.hr,{}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"Elastic Universal Profiling home page\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-universal-profiling-agent-open-source\\",rel:\\"nofollow\\",children:\\"Elastic Universal Profiling agent going open source under Apache 2\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation\\",rel:\\"nofollow\\",children:\\"Pinpointing performance issues with profiling\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-is-generally-available\\",rel:\\"nofollow\\",children:\\"Elastic releases Universal Profiling\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whole-system-visibility-elastic-universal-profiling\\",rel:\\"nofollow\\",children:\\"Whole system profiling with Universal Profiling\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-efficient-cost-effective-applications\\",rel:\\"nofollow\\",children:\\"Cost-effective applications with Universal Profiling\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/universal-profiling.html\\",rel:\\"nofollow\\",children:\\"Elastic documentation on Universal Profiling\\"})}),`\\n`]}),`\\n`]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,i.jsx)(e,{...n,children:(0,i.jsx)(h,{...n})}):h(n)}return v(T);})();\\n;return Component;"},"_id":"articles/elastic-profiling-agent-acceptance-opentelemetry-faq.mdx","_raw":{"sourceFilePath":"articles/elastic-profiling-agent-acceptance-opentelemetry-faq.mdx","sourceFileName":"elastic-profiling-agent-acceptance-opentelemetry-faq.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-profiling-agent-acceptance-opentelemetry-faq"},"type":"Article","imageUrl":"/assets/images/elastic-profiling-agent-acceptance-opentelemetry-faq/profiling-acceptance-faq.png","readingTime":"5 min read","url":"/elastic-profiling-agent-acceptance-opentelemetry-faq","headings":[{"level":2,"title":"What is being announced?","href":"#what-is-being-announced"},{"level":2,"title":"What do Elastic users need to know?","href":"#what-do-elastic-users-need-to-know"},{"level":2,"title":"Why is Elastic contributing its Profiling Agent to OTel?","href":"#why-is-elastic-contributing-its-profiling-agent-to-otel"},{"level":2,"title":"Why is continuous profiling needed by organizations?","href":"#why-is-continuous-profiling-needed-by-organizations"},{"level":2,"title":"What is continuous profiling?","href":"#what-is-continuous-profiling"},{"level":2,"title":"Does Elastic support OpenTelemetry today?","href":"#does-elastic-support-opentelemetry-today"},{"level":2,"title":"Where can I learn more about Elastic’s Universal Profiling?","href":"#where-can-i-learn-more-about-elastics-universal-profiling"}]},{"title":"Elastic contributes its Universal Profiling agent to OpenTelemetry","slug":"elastic-profiling-agent-acceptance-opentelemetry","date":"2024-06-06","description":"Elastic is advancing the adoption of OpenTelemetry with the contribution of its universal profiling agent. Elastic is committed to ensuring a vendor-agnostic ingestion and collection of observability and security telemetry through OpenTelemetry.","image":"profiling-acceptance.png","author":[{"slug":"christos-kalkanis","type":"Author","_raw":{}},{"slug":"alexander-wert","type":"Author","_raw":{}},{"slug":"abhishek-singh","type":"Author","_raw":{}}],"tags":[{"slug":"universal-profiling","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}}],"body":{"raw":"Following great collaboration between Elastic and OpenTelemetry\'s profiling community, which included a thorough review process, the OpenTelemetry community has accepted Elastic\'s donation of our continuous profiling agent. This marks a significant milestone in helping establish profiling as the fourth telemetry signal in OpenTelemetry. Elastic’s eBPF-based continuous profiling agent observes code across different programming languages and runtimes, third-party libraries, kernel operations, and system resources with low CPU and memory overhead in production. SREs can now benefit from these capabilities: quickly identifying performance bottlenecks, maximizing resource utilization, reducing carbon footprint, and optimizing cloud spend.\\nOver the past year, we have been instrumental in [enhancing OpenTelemetry\'s Semantic Conventions](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/) with the donation of Elastic Common Schema (ECS), contributing to the OpenTelemetry Collector and language SDKs, and have been working with OpenTelemetry’s Profiling Special Interest Group (SIG) to lay the foundation necessary to make profiling stable.\\n\\nWith today’s acceptance, we are officially contributing our continuous profiler technology to OpenTelemetry. We will also dedicate a team of profiling domain experts to co-maintain and advance the profiling capabilities within OTel.\\n\\nWe want to thank the OpenTelemetry community for the great and constructive cooperation on the donation proposal. We look forward to jointly establishing continuous profiling as an integral part of OpenTelemetry. \\n\\n## What is continuous profiling?\\n\\nProfiling is a technique used to understand the behavior of a software application by collecting information about its execution. This includes tracking the duration of function calls, memory usage, CPU usage, and other system resources. \\n \\nHowever, traditional profiling solutions have significant drawbacks limiting adoption in production environments:\\n\\n* Significant cost and performance overhead due to code instrumentation\\n* Disruptive service restarts\\n* Inability to get visibility into third-party libraries\\n\\nUnlike traditional profiling, which is often done only in a specific development phase or under controlled test conditions, continuous profiling runs in the background with minimal overhead. This provides real-time, actionable insights without replicating issues in separate environments. SREs, DevOps, and developers can see how code affects performance and cost, making code and infrastructure improvements easier.\\n\\n## Contribution of production-grade features\\nElastic Universal Profiling is a whole-system, always-on, continuous profiling solution that eliminates the need for code instrumentation, recompilation, on-host debug symbols or service restarts. Leveraging eBPF, Elastic Universal Profiling profiles every line of code running on a machine, including application code, kernel, and third-party libraries. The solution measures code efficiency in three dimensions, CPU utilization, CO2, and cloud cost, to help organizations manage efficient services by minimizing computational waste.\\n\\nThe Elastic profiling agent facilitates identifying non-optimal code paths, uncovering \\"unknown unknowns\\", and provides comprehensive visibility into the runtime behavior of all applications. Elastic’s continuous profiling agent supports various runtimes and languages, such as C/C++, Rust, Zig, Go, Java, Python, Ruby, PHP, Node.js, V8, Perl, and .NET.\\n\\nAdditionally, organizations can meet sustainability objectives by minimizing computational wastage, ensuring seamless alignment with their strategic [ESG](https://en.wikipedia.org/wiki/Environmental,_social,_and_corporate_governance) goals.\\n\\n## Benefits to OpenTelemetry\\n\\nThis contribution not only boosts the standardization of continuous profiling for observability but also accelerates the practical adoption of profiling as the fourth key signal in OTel. Customers get a vendor-agnostic way of collecting profiling data and enabling correlation with existing signals, like tracing, metrics, and logs, opening [new potential for observability insights and a more efficient troubleshooting experience](https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation).\xa0\\n\\nOTel-based continuous profiling unlocks the following possibilities for users:\\n\\n- Improved customer experience: delivering consistent service quality and performance through continuous profiling ensures customers have an application that performs optimally, remains responsive, and is reliable.\\n\\n* Maximize gross margins: Businesses can optimize their cloud spend and improve profitability by reducing the computational resources needed to run applications. Whole system continuous profiling identifies the most expensive functions (down to the lines of code) across diverse environments that may span multiple cloud providers. In the cloud context, every CPU cycle saved translates to money saved.\xa0\\n\\n- Minimize environmental impact: energy consumption associated with computing is a growing concern (source: [MIT Energy Initiative](https://energy.mit.edu/news/energy-efficient-computing/) ). More efficient code translates to lower energy consumption, reducing carbon (CO2) footprint.\xa0\\n\\n* Accelerate engineering workflows: continuous profiling provides detailed insights to help troubleshoot complex issues faster, guide development, and improve overall code quality.\\n\\n- Improved vendor neutrality and increased efficiency: an OTel eBPF-based profiling agent removes the need to use proprietary APM agents and offers a more efficient way to collect profiling telemetry.\\n\\nWith these benefits, customers can now manage the overall application’s efficiency on the cloud while ensuring their engineering teams optimize it.\\n\\n## What comes next?\\n\\nWhile the acceptance of Elastic’s donation of the profiling agent marks a significant milestone in the evolution of OTel’s eBPF-based continuous profiling capabilities, it represents the beginning of a broader journey. Moving forward, we will continue collaborating closely with the OTel Profiling and Collector SIGs to ensure seamless integration of the profiling agent within the broader OTel ecosystem. During this phase, users can test early preview versions of the OTel profiling integration by following the directions in the [otel-profiling-agent](https://github.com/elastic/otel-profiling-agent/) repository.\\n\\nElastic remains deeply committed to OTel’s vision of enabling cross-signal correlation. We plan to further contribute to the community by sharing our innovative research and implementations, specifically those facilitating the correlation between profiling data and distributed traces, across several OTel language SDKs and the profiling agent.\\n\\nWe are excited about our [growing relationship with OTel](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/) and the opportunity to donate our profiling agent in a way that benefits both the Elastic community and the broader OTel community. Learn more about [Elastic’s OpenTelemetry support](https://www.elastic.co/observability/opentelemetry) and learn how to contribute to the ongoing profiling work in the community.\\n\\n## Additional Resources\\nAdditional details on Elastic’s Universal Profiling can be found in the [FAQ](https://www.elastic.co/observability-labs/blog/elastic-profiling-agent-acceptance-opentelemetry-faq). \\n\\nFor insights into observability, visit Observability labs where OTel specific articles are also available.\\n\\n\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var t in e)r(n,t,{get:e[t],enumerable:!0})},s=(n,e,t,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of u(e))!m.call(n,o)&&o!==t&&r(n,o,{get:()=>e[o],enumerable:!(a=g(e,o))||a.enumerable});return n};var v=(n,e,t)=>(t=n!=null?p(f(n)):{},s(e||!n||!n.__esModule?r(t,\\"default\\",{value:n,enumerable:!0}):t,n)),w=n=>s(r({},\\"__esModule\\",{value:!0}),n);var c=y((x,l)=>{l.exports=_jsx_runtime});var O={};b(O,{default:()=>h,frontmatter:()=>T});var i=v(c()),T={title:\\"Elastic contributes its Universal Profiling agent to OpenTelemetry\\",slug:\\"elastic-profiling-agent-acceptance-opentelemetry\\",date:\\"2024-06-06\\",description:\\"Elastic is advancing the adoption of OpenTelemetry with the contribution of its universal profiling agent. Elastic is committed to ensuring a vendor-agnostic ingestion and collection of observability and security telemetry through OpenTelemetry.\\",author:[{slug:\\"christos-kalkanis\\"},{slug:\\"alexander-wert\\"},{slug:\\"abhishek-singh\\"}],image:\\"profiling-acceptance.png\\",tags:[{slug:\\"universal-profiling\\"},{slug:\\"opentelemetry\\"}]};function d(n){let e={a:\\"a\\",h2:\\"h2\\",li:\\"li\\",p:\\"p\\",ul:\\"ul\\",...n.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(e.p,{children:[`Following great collaboration between Elastic and OpenTelemetry\'s profiling community, which included a thorough review process, the OpenTelemetry community has accepted Elastic\'s donation of our continuous profiling agent. This marks a significant milestone in helping establish profiling as the fourth telemetry signal in OpenTelemetry. Elastic\\\\u2019s eBPF-based continuous profiling agent observes code across different programming languages and runtimes, third-party libraries, kernel operations, and system resources with low CPU and memory overhead in production. SREs can now benefit from these capabilities: quickly identifying performance bottlenecks, maximizing resource utilization, reducing carbon footprint, and optimizing cloud spend.\\nOver the past year, we have been instrumental in `,(0,i.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"enhancing OpenTelemetry\'s Semantic Conventions\\"}),\\" with the donation of Elastic Common Schema (ECS), contributing to the OpenTelemetry Collector and language SDKs, and have been working with OpenTelemetry\\\\u2019s Profiling Special Interest Group (SIG) to lay the foundation necessary to make profiling stable.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"With today\\\\u2019s acceptance, we are officially contributing our continuous profiler technology to OpenTelemetry. We will also dedicate a team of profiling domain experts to co-maintain and advance the profiling capabilities within OTel.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"We want to thank the OpenTelemetry community for the great and constructive cooperation on the donation proposal. We look forward to jointly establishing continuous profiling as an integral part of OpenTelemetry.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"what-is-continuous-profiling\\",children:\\"What is continuous profiling?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Profiling is a technique used to understand the behavior of a software application by collecting information about its execution. This includes tracking the duration of function calls, memory usage, CPU usage, and other system resources.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"However, traditional profiling solutions have significant drawbacks limiting adoption in production environments:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Significant cost and performance overhead due to code instrumentation\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Disruptive service restarts\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Inability to get visibility into third-party libraries\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Unlike traditional profiling, which is often done only in a specific development phase or under controlled test conditions, continuous profiling runs in the background with minimal overhead. This provides real-time, actionable insights without replicating issues in separate environments. SREs, DevOps, and developers can see how code affects performance and cost, making code and infrastructure improvements easier.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"contribution-of-production-grade-features\\",children:\\"Contribution of production-grade features\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic Universal Profiling is a whole-system, always-on, continuous profiling solution that eliminates the need for code instrumentation, recompilation, on-host debug symbols or service restarts. Leveraging eBPF, Elastic Universal Profiling profiles every line of code running on a machine, including application code, kernel, and third-party libraries. The solution measures code efficiency in three dimensions, CPU utilization, CO2, and cloud cost, to help organizations manage efficient services by minimizing computational waste.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\'The Elastic profiling agent facilitates identifying non-optimal code paths, uncovering \\"unknown unknowns\\", and provides comprehensive visibility into the runtime behavior of all applications. Elastic\\\\u2019s continuous profiling agent supports various runtimes and languages, such as C/C++, Rust, Zig, Go, Java, Python, Ruby, PHP, Node.js, V8, Perl, and .NET.\'}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Additionally, organizations can meet sustainability objectives by minimizing computational wastage, ensuring seamless alignment with their strategic \\",(0,i.jsx)(e.a,{href:\\"https://en.wikipedia.org/wiki/Environmental,_social,_and_corporate_governance\\",rel:\\"nofollow\\",children:\\"ESG\\"}),\\" goals.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"benefits-to-opentelemetry\\",children:\\"Benefits to OpenTelemetry\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"This contribution not only boosts the standardization of continuous profiling for observability but also accelerates the practical adoption of profiling as the fourth key signal in OTel. Customers get a vendor-agnostic way of collecting profiling data and enabling correlation with existing signals, like tracing, metrics, and logs, opening \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation\\",rel:\\"nofollow\\",children:\\"new potential for observability insights and a more efficient troubleshooting experience\\"}),\\".\\\\xA0\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"OTel-based continuous profiling unlocks the following possibilities for users:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Improved customer experience: delivering consistent service quality and performance through continuous profiling ensures customers have an application that performs optimally, remains responsive, and is reliable.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Maximize gross margins: Businesses can optimize their cloud spend and improve profitability by reducing the computational resources needed to run applications. Whole system continuous profiling identifies the most expensive functions (down to the lines of code) across diverse environments that may span multiple cloud providers. In the cloud context, every CPU cycle saved translates to money saved.\\\\xA0\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[\\"Minimize environmental impact: energy consumption associated with computing is a growing concern (source: \\",(0,i.jsx)(e.a,{href:\\"https://energy.mit.edu/news/energy-efficient-computing/\\",rel:\\"nofollow\\",children:\\"MIT Energy Initiative\\"}),\\" ). More efficient code translates to lower energy consumption, reducing carbon (CO2) footprint.\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Accelerate engineering workflows: continuous profiling provides detailed insights to help troubleshoot complex issues faster, guide development, and improve overall code quality.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Improved vendor neutrality and increased efficiency: an OTel eBPF-based profiling agent removes the need to use proprietary APM agents and offers a more efficient way to collect profiling telemetry.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"With these benefits, customers can now manage the overall application\\\\u2019s efficiency on the cloud while ensuring their engineering teams optimize it.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"what-comes-next\\",children:\\"What comes next?\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"While the acceptance of Elastic\\\\u2019s donation of the profiling agent marks a significant milestone in the evolution of OTel\\\\u2019s eBPF-based continuous profiling capabilities, it represents the beginning of a broader journey. Moving forward, we will continue collaborating closely with the OTel Profiling and Collector SIGs to ensure seamless integration of the profiling agent within the broader OTel ecosystem. During this phase, users can test early preview versions of the OTel profiling integration by following the directions in the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/otel-profiling-agent/\\",rel:\\"nofollow\\",children:\\"otel-profiling-agent\\"}),\\" repository.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic remains deeply committed to OTel\\\\u2019s vision of enabling cross-signal correlation. We plan to further contribute to the community by sharing our innovative research and implementations, specifically those facilitating the correlation between profiling data and distributed traces, across several OTel language SDKs and the profiling agent.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"We are excited about our \\",(0,i.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"growing relationship with OTel\\"}),\\" and the opportunity to donate our profiling agent in a way that benefits both the Elastic community and the broader OTel community. Learn more about \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s OpenTelemetry support\\"}),\\" and learn how to contribute to the ongoing profiling work in the community.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"additional-resources\\",children:\\"Additional Resources\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Additional details on Elastic\\\\u2019s Universal Profiling can be found in the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-profiling-agent-acceptance-opentelemetry-faq\\",rel:\\"nofollow\\",children:\\"FAQ\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"For insights into observability, visit Observability labs where OTel specific articles are also available.\\"})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,i.jsx)(e,{...n,children:(0,i.jsx)(d,{...n})}):d(n)}return w(O);})();\\n;return Component;"},"_id":"articles/elastic-profiling-agent-acceptance-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/elastic-profiling-agent-acceptance-opentelemetry.mdx","sourceFileName":"elastic-profiling-agent-acceptance-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-profiling-agent-acceptance-opentelemetry"},"type":"Article","imageUrl":"/assets/images/elastic-profiling-agent-acceptance-opentelemetry/profiling-acceptance.png","readingTime":"5 min read","url":"/elastic-profiling-agent-acceptance-opentelemetry","headings":[{"level":2,"title":"What is continuous profiling?","href":"#what-is-continuous-profiling"},{"level":2,"title":"Contribution of production-grade features","href":"#contribution-of-production-grade-features"},{"level":2,"title":"Benefits to OpenTelemetry","href":"#benefits-to-opentelemetry"},{"level":2,"title":"What comes next?","href":"#what-comes-next"},{"level":2,"title":"Additional Resources","href":"#additional-resources"}]},{"title":"Elastic\'s RAG-based AI Assistant: Analyze application issues with LLMs and private GitHub issues","slug":"elastic-rag-ai-assistant-application-issues-llm-github","date":"2024-05-08","description":"In this blog, we review how GitHub issues and other GitHub documents from internal and external GitHub repositories can be used in root cause analysis with Elastic’s RAG-based AI Assistant.","image":"AI_fingertip_touching_human_fingertip.jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"genai","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs an SRE, analyzing applications is more complex than ever. Not only do you have to ensure the application is running optimally to ensure great customer experiences, but you must also understand the inner workings in some cases to help troubleshoot. Analyzing issues in a production-based service is a team sport. It takes the SRE, DevOps, development, and support to get to the root cause and potentially remediate. If it\'s impacting, then it\'s even worse because there is a race against time. Regardless of the situation, there is a ton of information that needs to be consumed and processed. This includes not only what the customer is experiencing, but also internal data to help provide the most appropriate resolution.\\n\\nElastic’s AI Assistant helps improve analysis for SREs, DevOps, Devs, and others. In a single window using natural language questions, you can analyze using not only general information but combine it with things like:\\n\\n- Issues from internal GitHub repos, Jira, etc.\\n\\n- Documents from internal wiki sites from Confluence, etc.\\n\\n- Customer issues from your support service\\n\\n- And more\\n\\nIn this blog, we will walk you through how to:\\n\\n1. Ingest an external GitHub repository ([OpenTelemetry demo repo](https://github.com/open-telemetry/opentelemetry-demo)) with code and issues into Elastic. Apply Elastic Learned Sparse EncodeR (ELSER) and store it in a specific index for the AI Assistant.\\n\\n2. Ingest internal GitHub repository with runbook information into Elastic. Apply ELSER and store the processed data in a specific index for the AI Assistant.\\n\\n3. Use these two indices when analyzing issues for the OpenTelemetry demo in Elastic using the AI Assistant.\\n\\n## 3 simple questions using GitHub data with AI Assistant\\n\\nBefore we walk through the steps for setting up data from GitHub, let’s review what an SRE can do with the AI Assistant and GitHub repos.\\n\\nWe initially connect to GitHub using an Elastic GitHub connector and ingest and process two repos: the OpenTelemetry demo repo (public) and an internal runbook repo (Elastic internal).\\n\\n![1 - elasticsearch connectors](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/1.png)\\n\\nWith these two loaded and parsed by ELSER, we ask the AI Assistant some simple questions generally asked during analysis.\\n\\n### How many issues are open for the OpenTelemetry demo?\\n\\nSince we ingested the entire repo (as of April 26, 2024) with a doc count of 1,529, we ask it a simple question regarding the total number of issues that are open. We specifically tell the AI Assistant to search our internal index to ensure the LLM knows to ask Elastic to search its internal index for the total number of issues.\\n\\n\\n\\n### Are there any issues for the Rust based shippingservice?\\n\\nElastic’s AI Assistant uses ELSER to traverse the loaded GitHub repo and finds the open issue against the shippingservice (which is the following [issue](https://github.com/open-telemetry/opentelemetry-demo/issues/346) at the time of writing this post).\\n\\n\\n\\n### Is there a runbook for the Cartservice?\\n\\nSince we loaded an internal GitHub repo with a few sample runbooks, the Elastic AI Assistant properly finds the runbook.\\n\\n\\n\\nAs we go through this blog, we will talk about how the AI Assistant finds these issues using ELSER and how you can configure it to use your own GitHub repos.\\n\\n## Retrieval augmented generation (RAG) with Elastic AI Assistant\\n\\nElastic has the most advanced RAG-based AI Assistant for both Observability and Security. It can help you analyze your data using:\\n\\n- Your favorite LLM (OpenAI, Azure OpenAI, AWS Bedrock, etc.)\\n\\n- Any internal information (GitHub, Confluence, customer issues, etc.) you can either connect to or bring into Elastic’s indices\\n\\n![Elastic AI Assistant — connecting internal and external information](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/2.png)\\n\\nThe reason Elastic’s AI Assistant can do this is because it supports RAG, which helps retrieve internal information along with LLM-based knowledge.\\n\\nAdding relevant internal information for an SRE into Elastic:\\n\\n- As data comes in, such as in your GitHub repository, ELSER is applied to the data, and embeddings (weights and tokens into a sparse vector field) are added to capture semantic meaning and context of the data.\\n\\n- This data (GitHub, Confluence, etc.) is processed with embeddings and is stored in an index that can be searched by the AI Assistant.\\n\\nWhen you query the AI Assistant for information:\\n\\n- The query goes through the same inference process as the ingested data using ELSER. The input query generates a “sparse vector,” which is used to find the most relevant highly ranked information in the ingested data (GitHub, Confluence, etc.).\\n\\n- The retrieved data is then combined with the query and also sent over to the LLM, which will then add its own knowledge base information (if there is anything to add), or it might ask Elastic (via function calls) to analyze, chart, or even search further. If a function call is made to Elastic and a response is provided, it will be added by the LLM to its response.\\n\\n- The results will be the most contextual based answer combining both LLM and anything relevant from your internal data.\\n\\n![3 - elastic\'s RAG flowchart](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/3.png)\\n\\n## Application, prerequisites, and config\\n\\nIf you want to try the steps in this blog, here are some prerequisites:\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/)\\n\\n- [OpenTelemetry demo](https://github.com/open-telemetry/opentelemetry-demo) running and connected to Elastic ([APM documentation](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry-direct.html#apm-instrument-apps-otel))\\n\\n- Whatever internal GitHub repo you want to use with some information that is useful for analysis (In our walk through, we will be using a GitHub repo that houses runbooks for different scenarios when Elastic does demos).\\n\\n- Account with your favorite or approved LLM (OpenAI, Azure OpenAI, AWS Bedrock)\\n\\n## Adding the GitHub repos to Elastic\\n\\nThe first step is to set up the GitHub connector and connect to your GitHub repo. Elastic has several connectors from GitHub, Confluence, Google Drive, Jira, AWS S3, Microsoft Teams, Slack, and more. So while we will go over the GitHub connector in this blog, don’t forget about other connectors.\\n\\n![4 - select a connector](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/4.png)\\n\\nOnce you select the GitHub connector and give it a name, you need to add two items:\\n\\n- GitHub token\\n\\n- The URL open-telemetry/opentelemetry-demo\\n\\n![5 - configuration](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/5.png)\\n\\nNext, add it to an index in the wizard.\\n\\n![6 - attach an index](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/6.png)\\n\\n## Create a pipeline and process the data with ELSER\\n\\nIn order to add the embeddings we discussed in the section above, we need to add the following to the connector:\\n\\n- Create a pipeline in the configuration wizard.\\n\\n- Create a custom pipeline.\\n\\n- Add the ML inference pipeline.\\n\\n- Select ELSERv2 ML Model to add the embeddings.\\n\\n- Select the fields that need to be evaluated as part of the inference pipeline.\\n\\n- Test and save the inference pipeline and the overall pipeline.\\n\\n![7](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/7.png)\\n\\n![8](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/8.png)\\n\\n![9](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/9.png)\\n\\n![10](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/10.png)\\n\\n## Sync the data\\n\\nNow that the pipeline is created, you need to start to sync the github repo. As the documents from the github repo come in, they will go through the pipeline and embeddings will be added.\\n\\n![11](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/11.png)\\n\\n## Embeddings\\n\\nOnce the pipeline is set up, sync the data in the connector. As the GitHub repository comes in, the inference pipeline will process the data as follows:\\n\\n- As data comes in from your GitHub repository, ELSER is applied to the data, and embeddings (weights and tokens into a sparse vector field) are added to capture semantic meaning and context of the data.\\n\\n- This data is processed with embeddings and is stored in an index that can be searched by the AI Assistant.\\n\\nWhen you look at the OpenTelemetry GitHub documents that were ingested, you will see how the weights and token are added to the predicted_value field in the index.\\n\\n![12](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/12.png)\\n\\n![13](/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/13.png)\\n\\nThese embeddings will now be used to find the most contextually relevant document when the user asks the AI Assistant a query, which might use this.\\n\\n## Check if AI Assistant can use the index\\n\\nElastic’s AI Assistant uses ELSER to traverse the loaded Github repo and finds the open issue against the shippingservice. (which is the following [issue](https://github.com/open-telemetry/opentelemetry-demo/issues/346) at the time of writing this post).\\n\\n\\n\\nBased on the response, we can see that the AI Assistant can now use the index to find the issue and use it for further analysis.\\n\\n## Conclusion\\n\\nYou’ve now seen how easy Elastic’s RAG-based AI Assistant is to set up. You can bring in documents from multiple locations (GitHub, Confluent, Slack, etc.). We’ve shown the setup for GitHub and OpenTelemetry. This internal information can be useful in managing issues, accelerating resolution, and improving customer experiences. Check out our other blogs on how the AI Assistant can help SREs do better analysis, lower MTTR, and improve operations overall:\\n\\n- [Analyzing OpenTelemetry apps with Elastic AI Assistant and APM](https://www.elastic.co/blog/analyzing-opentelemetry-apps-elastic-ai-assistant-apm)\\n\\n- [The Elastic AI Assistant for Observability escapes Kibana!](https://www.elastic.co/blog/elastic-ai-assistant-observability-escapes-kibana)\\n\\n- [Getting started with the Elastic AI Assistant for Observability and Microsoft Azure OpenAI](https://www.elastic.co/blog/elastic-ai-assistant-observability-microsoft-azure-openai)\\n\\n- [Elastic 8.13: GA of Amazon Bedrock in the Elastic AI Assistant for Observability](https://www.elastic.co/blog/whats-new-elastic-8-13-0)\\n\\n- [Enhancing SRE troubleshooting with the AI Assistant for Observability and your organization\'s runbooks](https://www.elastic.co/blog/sre-troubleshooting-ai-assistant-observability-runbooks)\\n\\n- [Context-aware insights using the Elastic AI Assistant for Observability](https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability)\\n\\n- [Getting started with the Elastic AI Assistant for Observability and Amazon Bedrock](https://www.elastic.co/blog/elastic-ai-assistant-observability-amazon-bedrock)\\n\\n## Try it out\\n\\nExisting Elastic Cloud customers can access many of these features directly from the [Elastic Cloud console](https://cloud.elastic.co/). Not taking advantage of Elastic on cloud? [Start a free trial](https://www.elastic.co/cloud/cloud-trial-overview).\\n\\nAll of this is also possible in your environments. [Learn how to get started today](https://www.elastic.co/observability/universal-profiling).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var f=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),b=(t,e)=>{for(var n in e)a(t,n,{get:e[n],enumerable:!0})},r=(t,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let s of g(e))!w.call(t,s)&&s!==n&&a(t,s,{get:()=>e[s],enumerable:!(o=u(e,s))||o.enumerable});return t};var y=(t,e,n)=>(n=t!=null?p(m(t)):{},r(e||!t||!t.__esModule?a(n,\\"default\\",{value:t,enumerable:!0}):n,t)),A=t=>r(a({},\\"__esModule\\",{value:!0}),t);var h=f((G,l)=>{l.exports=_jsx_runtime});var I={};b(I,{default:()=>d,frontmatter:()=>v});var i=y(h()),v={title:\\"Elastic\'s RAG-based AI Assistant: Analyze application issues with LLMs and private GitHub issues\\",slug:\\"elastic-rag-ai-assistant-application-issues-llm-github\\",date:\\"2024-05-08\\",description:\\"In this blog, we review how GitHub issues and other GitHub documents from internal and external GitHub repositories can be used in root cause analysis with Elastic\\\\u2019s RAG-based AI Assistant.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"AI_fingertip_touching_human_fingertip.jpg\\",tags:[{slug:\\"genai\\"}]};function c(t){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",ul:\\"ul\\",...t.components},{Video:n}=e;return n||E(\\"Video\\",!0),(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(e.p,{children:\\"As an SRE, analyzing applications is more complex than ever. Not only do you have to ensure the application is running optimally to ensure great customer experiences, but you must also understand the inner workings in some cases to help troubleshoot. Analyzing issues in a production-based service is a team sport. It takes the SRE, DevOps, development, and support to get to the root cause and potentially remediate. If it\'s impacting, then it\'s even worse because there is a race against time. Regardless of the situation, there is a ton of information that needs to be consumed and processed. This includes not only what the customer is experiencing, but also internal data to help provide the most appropriate resolution.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic\\\\u2019s AI Assistant helps improve analysis for SREs, DevOps, Devs, and others. In a single window using natural language questions, you can analyze using not only general information but combine it with things like:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Issues from internal GitHub repos, Jira, etc.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Documents from internal wiki sites from Confluence, etc.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Customer issues from your support service\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"And more\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"In this blog, we will walk you through how to:\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[\\"Ingest an external GitHub repository (\\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry demo repo\\"}),\\") with code and issues into Elastic. Apply Elastic Learned Sparse EncodeR (ELSER) and store it in a specific index for the AI Assistant.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Ingest internal GitHub repository with runbook information into Elastic. Apply ELSER and store the processed data in a specific index for the AI Assistant.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Use these two indices when analyzing issues for the OpenTelemetry demo in Elastic using the AI Assistant.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"3-simple-questions-using-github-data-with-ai-assistant\\",children:\\"3 simple questions using GitHub data with AI Assistant\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Before we walk through the steps for setting up data from GitHub, let\\\\u2019s review what an SRE can do with the AI Assistant and GitHub repos.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"We initially connect to GitHub using an Elastic GitHub connector and ingest and process two repos: the OpenTelemetry demo repo (public) and an internal runbook repo (Elastic internal).\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/1.png\\",alt:\\"1 - elasticsearch connectors\\",width:\\"1999\\",height:\\"898\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"With these two loaded and parsed by ELSER, we ask the AI Assistant some simple questions generally asked during analysis.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"how-many-issues-are-open-for-the-opentelemetry-demo\\",children:\\"How many issues are open for the OpenTelemetry demo?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Since we ingested the entire repo (as of April 26, 2024) with a doc count of 1,529, we ask it a simple question regarding the total number of issues that are open. We specifically tell the AI Assistant to search our internal index to ensure the LLM knows to ask Elastic to search its internal index for the total number of issues.\\"}),`\\n`,(0,i.jsx)(n,{vidyardUuid:\\"XyKWeYz21mdDkMfop7absQ\\",loop:!0}),`\\n`,(0,i.jsx)(e.h3,{id:\\"are-there-any-issues-for-the-rust-based-shippingservice\\",children:\\"Are there any issues for the Rust based shippingservice?\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic\\\\u2019s AI Assistant uses ELSER to traverse the loaded GitHub repo and finds the open issue against the shippingservice (which is the following \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo/issues/346\\",rel:\\"nofollow\\",children:\\"issue\\"}),\\" at the time of writing this post).\\"]}),`\\n`,(0,i.jsx)(n,{vidyardUuid:\\"TF1qgy3WH3cuLQdBvdX66A\\",loop:!0}),`\\n`,(0,i.jsx)(e.h3,{id:\\"is-there-a-runbook-for-the-cartservice\\",children:\\"Is there a runbook for the Cartservice?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Since we loaded an internal GitHub repo with a few sample runbooks, the Elastic AI Assistant properly finds the runbook.\\"}),`\\n`,(0,i.jsx)(n,{vidyardUuid:\\"kSukiZ6zYZDQDycs616ji8\\",loop:!0}),`\\n`,(0,i.jsx)(e.p,{children:\\"As we go through this blog, we will talk about how the AI Assistant finds these issues using ELSER and how you can configure it to use your own GitHub repos.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"retrieval-augmented-generation-rag-with-elastic-ai-assistant\\",children:\\"Retrieval augmented generation (RAG) with Elastic AI Assistant\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic has the most advanced RAG-based AI Assistant for both Observability and Security. It can help you analyze your data using:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Your favorite LLM (OpenAI, Azure OpenAI, AWS Bedrock, etc.)\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Any internal information (GitHub, Confluence, customer issues, etc.) you can either connect to or bring into Elastic\\\\u2019s indices\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/2.png\\",alt:\\"Elastic AI Assistant \\\\u2014 connecting internal and external information\\",width:\\"1999\\",height:\\"1120\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"The reason Elastic\\\\u2019s AI Assistant can do this is because it supports RAG, which helps retrieve internal information along with LLM-based knowledge.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Adding relevant internal information for an SRE into Elastic:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"As data comes in, such as in your GitHub repository, ELSER is applied to the data, and embeddings (weights and tokens into a sparse vector field) are added to capture semantic meaning and context of the data.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"This data (GitHub, Confluence, etc.) is processed with embeddings and is stored in an index that can be searched by the AI Assistant.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"When you query the AI Assistant for information:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"The query goes through the same inference process as the ingested data using ELSER. The input query generates a \\\\u201Csparse vector,\\\\u201D which is used to find the most relevant highly ranked information in the ingested data (GitHub, Confluence, etc.).\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"The retrieved data is then combined with the query and also sent over to the LLM, which will then add its own knowledge base information (if there is anything to add), or it might ask Elastic (via function calls) to analyze, chart, or even search further. If a function call is made to Elastic and a response is provided, it will be added by the LLM to its response.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"The results will be the most contextual based answer combining both LLM and anything relevant from your internal data.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/3.png\\",alt:\\"3 - elastic\'s RAG flowchart\\",width:\\"1999\\",height:\\"1111\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"If you want to try the steps in this blog, here are some prerequisites:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"})]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry demo\\"}),\\" running and connected to Elastic (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry-direct.html#apm-instrument-apps-otel\\",rel:\\"nofollow\\",children:\\"APM documentation\\"}),\\")\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Whatever internal GitHub repo you want to use with some information that is useful for analysis (In our walk through, we will be using a GitHub repo that houses runbooks for different scenarios when Elastic does demos).\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Account with your favorite or approved LLM (OpenAI, Azure OpenAI, AWS Bedrock)\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"adding-the-github-repos-to-elastic\\",children:\\"Adding the GitHub repos to Elastic\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"The first step is to set up the GitHub connector and connect to your GitHub repo. Elastic has several connectors from GitHub, Confluence, Google Drive, Jira, AWS S3, Microsoft Teams, Slack, and more. So while we will go over the GitHub connector in this blog, don\\\\u2019t forget about other connectors.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/4.png\\",alt:\\"4 - select a connector\\",width:\\"1999\\",height:\\"1044\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once you select the GitHub connector and give it a name, you need to add two items:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"GitHub token\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"The URL open-telemetry/opentelemetry-demo\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/5.png\\",alt:\\"5 - configuration\\",width:\\"1999\\",height:\\"1416\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Next, add it to an index in the wizard.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/6.png\\",alt:\\"6 - attach an index\\",width:\\"1710\\",height:\\"1004\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"create-a-pipeline-and-process-the-data-with-elser\\",children:\\"Create a pipeline and process the data with ELSER\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"In order to add the embeddings we discussed in the section above, we need to add the following to the connector:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Create a pipeline in the configuration wizard.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Create a custom pipeline.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Add the ML inference pipeline.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Select ELSERv2 ML Model to add the embeddings.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Select the fields that need to be evaluated as part of the inference pipeline.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Test and save the inference pipeline and the overall pipeline.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/7.png\\",alt:\\"7\\",width:\\"1999\\",height:\\"969\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/8.png\\",alt:\\"8\\",width:\\"1290\\",height:\\"1134\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/9.png\\",alt:\\"9\\",width:\\"1999\\",height:\\"1087\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/10.png\\",alt:\\"10\\",width:\\"1999\\",height:\\"1094\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"sync-the-data\\",children:\\"Sync the data\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Now that the pipeline is created, you need to start to sync the github repo. As the documents from the github repo come in, they will go through the pipeline and embeddings will be added.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/11.png\\",alt:\\"11\\",width:\\"1085\\",height:\\"779\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"embeddings\\",children:\\"Embeddings\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once the pipeline is set up, sync the data in the connector. As the GitHub repository comes in, the inference pipeline will process the data as follows:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"As data comes in from your GitHub repository, ELSER is applied to the data, and embeddings (weights and tokens into a sparse vector field) are added to capture semantic meaning and context of the data.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"This data is processed with embeddings and is stored in an index that can be searched by the AI Assistant.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"When you look at the OpenTelemetry GitHub documents that were ingested, you will see how the weights and token are added to the predicted_value field in the index.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/12.png\\",alt:\\"12\\",width:\\"1999\\",height:\\"1039\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/13.png\\",alt:\\"13\\",width:\\"1999\\",height:\\"1061\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"These embeddings will now be used to find the most contextually relevant document when the user asks the AI Assistant a query, which might use this.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"check-if-ai-assistant-can-use-the-index\\",children:\\"Check if AI Assistant can use the index\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic\\\\u2019s AI Assistant uses ELSER to traverse the loaded Github repo and finds the open issue against the shippingservice. (which is the following \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo/issues/346\\",rel:\\"nofollow\\",children:\\"issue\\"}),\\" at the time of writing this post).\\"]}),`\\n`,(0,i.jsx)(n,{vidyardUuid:\\"TF1qgy3WH3cuLQdBvdX66A\\",loop:!0}),`\\n`,(0,i.jsx)(e.p,{children:\\"Based on the response, we can see that the AI Assistant can now use the index to find the issue and use it for further analysis.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"You\\\\u2019ve now seen how easy Elastic\\\\u2019s RAG-based AI Assistant is to set up. You can bring in documents from multiple locations (GitHub, Confluent, Slack, etc.). We\\\\u2019ve shown the setup for GitHub and OpenTelemetry. This internal information can be useful in managing issues, accelerating resolution, and improving customer experiences. Check out our other blogs on how the AI Assistant can help SREs do better analysis, lower MTTR, and improve operations overall:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/analyzing-opentelemetry-apps-elastic-ai-assistant-apm\\",rel:\\"nofollow\\",children:\\"Analyzing OpenTelemetry apps with Elastic AI Assistant and APM\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-ai-assistant-observability-escapes-kibana\\",rel:\\"nofollow\\",children:\\"The Elastic AI Assistant for Observability escapes Kibana!\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-ai-assistant-observability-microsoft-azure-openai\\",rel:\\"nofollow\\",children:\\"Getting started with the Elastic AI Assistant for Observability and Microsoft Azure OpenAI\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-8-13-0\\",rel:\\"nofollow\\",children:\\"Elastic 8.13: GA of Amazon Bedrock in the Elastic AI Assistant for Observability\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/sre-troubleshooting-ai-assistant-observability-runbooks\\",rel:\\"nofollow\\",children:\\"Enhancing SRE troubleshooting with the AI Assistant for Observability and your organization\'s runbooks\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability\\",rel:\\"nofollow\\",children:\\"Context-aware insights using the Elastic AI Assistant for Observability\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-ai-assistant-observability-amazon-bedrock\\",rel:\\"nofollow\\",children:\\"Getting started with the Elastic AI Assistant for Observability and Amazon Bedrock\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Existing Elastic Cloud customers can access many of these features directly from the \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"Elastic Cloud console\\"}),\\". Not taking advantage of Elastic on cloud? \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/cloud-trial-overview\\",rel:\\"nofollow\\",children:\\"Start a free trial\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"All of this is also possible in your environments. \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"Learn how to get started today\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,i.jsx)(e,{...t,children:(0,i.jsx)(c,{...t})}):c(t)}function E(t,e){throw new Error(\\"Expected \\"+(e?\\"component\\":\\"object\\")+\\" `\\"+t+\\"` to be defined: you likely forgot to import, pass, or provide it.\\")}return A(I);})();\\n;return Component;"},"_id":"articles/elastic-rag-ai-assistant-application-issues-llm-github.mdx","_raw":{"sourceFilePath":"articles/elastic-rag-ai-assistant-application-issues-llm-github.mdx","sourceFileName":"elastic-rag-ai-assistant-application-issues-llm-github.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-rag-ai-assistant-application-issues-llm-github"},"type":"Article","imageUrl":"/assets/images/elastic-rag-ai-assistant-application-issues-llm-github/AI_fingertip_touching_human_fingertip.jpg","readingTime":"10 min read","url":"/elastic-rag-ai-assistant-application-issues-llm-github","headings":[{"level":2,"title":"3 simple questions using GitHub data with AI Assistant","href":"#3-simple-questions-using-github-data-with-ai-assistant"},{"level":3,"title":"How many issues are open for the OpenTelemetry demo?","href":"#how-many-issues-are-open-for-the-opentelemetry-demo"},{"level":3,"title":"Are there any issues for the Rust based shippingservice?","href":"#are-there-any-issues-for-the-rust-based-shippingservice"},{"level":3,"title":"Is there a runbook for the Cartservice?","href":"#is-there-a-runbook-for-the-cartservice"},{"level":2,"title":"Retrieval augmented generation (RAG) with Elastic AI Assistant","href":"#retrieval-augmented-generation-rag-with-elastic-ai-assistant"},{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":2,"title":"Adding the GitHub repos to Elastic","href":"#adding-the-github-repos-to-elastic"},{"level":2,"title":"Create a pipeline and process the data with ELSER","href":"#create-a-pipeline-and-process-the-data-with-elser"},{"level":2,"title":"Sync the data","href":"#sync-the-data"},{"level":2,"title":"Embeddings","href":"#embeddings"},{"level":2,"title":"Check if AI Assistant can use the index","href":"#check-if-ai-assistant-can-use-the-index"},{"level":2,"title":"Conclusion","href":"#conclusion"},{"level":2,"title":"Try it out","href":"#try-it-out"}]},{"title":"Elastic SQL inputs: A generic solution for database metrics observability","slug":"sql-inputs-database-metrics-observability","date":"2023-09-11","description":"This blog dives into the functionality of generic SQL and provides various use cases for advanced users to ingest custom metrics to Elastic for database observability. We also introduce the fetch from all database new capability released in 8.10.","image":"patterns-midnight-background-no-logo-observability.png","author":[{"slug":"lalit-satapathy","type":"Author","_raw":{}},{"slug":"ishleen-kaur","type":"Author","_raw":{}},{"slug":"muthukumar-paramasivam","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"kql","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic\xae SQL inputs ([metricbeat](https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html) module and [input package](https://docs.elastic.co/integrations/sql)) allows the user to execute [SQL](https://en.wikipedia.org/wiki/SQL) queries against many supported databases in a flexible way and ingest the resulting metrics to Elasticsearch\xae. This blog dives into the functionality of generic SQL and provides various use cases for _advanced users_ to ingest custom metrics to Elastic\xae, for database observability. The blog also introduces the fetch from all database new capability, released in 8.10.\\n\\n## Why “Generic SQL”?\\n\\nElastic already has metricbeat and integration packages targeted for specific databases. One example is [metricbeat](https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-mysql.html) for MySQL — and the corresponding integration [package](https://docs.elastic.co/en/integrations/mysql). These beats modules and integrations are customized for a specific database, and the metrics are extracted using pre-defined queries from the specific database. The queries used in these integrations and the corresponding metrics are _not_ available for modification.\\n\\nWhereas the _Generic SQL inputs_ ([metricbeat](https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html) or [input package](https://docs.elastic.co/integrations/sql)) can be used to scrape metrics from any supported database using the user\'s SQL queries. The queries are provided by the user depending on specific metrics to be extracted. This enables a much more powerful mechanism for metrics ingestion, where users can choose a specific driver and provide the relevant SQL queries and the results get mapped to one or more Elasticsearch documents, using a structured mapping process (table/variable format explained later).\\n\\nGeneric SQL inputs can be used in conjunction with the existing integration packages, which already extract specific database metrics, to extract additional custom metrics dynamically, making this input very powerful. In this blog, _Generic SQL input_ and _Generic SQL_ are used interchangeably.\\n\\n![Generic SQL database metrics collection](/assets/images/sql-inputs-database-metrics-observability/elastic-blog-1-genericSQL.png)\\n\\n## Functionalities details\\n\\nThis section covers some of the features that would help with the metrics extraction. We provide a brief description of the response format configuration. Then we dive into the merge_results functionality, which is used to combine results from multiple SQL queries into a single document.\\n\\nThe next key functionality users may be interested in is to collect metrics from all the custom databases, which is now possible with the fetch_from_all_databases feature.\\n\\nNow let\'s dive into the specific functionalities:\\n\\n### Different drivers supported\\n\\nThe generic SQL can fetch metrics from the different databases. The current version has the capability to fetch metrics from the following drivers: MySQL, PostgreSQL, Oracle, and Microsoft SQL Server(MSSQL).\\n\\n### Response format\\n\\nThe response format in generic SQL is used to manipulate the data in either table or in variable format. Here’s an overview of the formats and syntax for creating and using the table and variables.\\n\\nSyntax: `response_format: table {{or}} variables`\\n\\n**Response format table** \\nThis mode generates a single event for each row. The table format has no restrictions on the number of columns in the response. This format can have any number of columns.\\n\\nExample:\\n\\n```sql\\ndriver: \\"mssql\\"\\nsql_queries:\\n - query: \\"SELECT counter_name, cntr_value FROM sys.dm_os_performance_counters WHERE counter_name= \'User Connections\'\\"\\n response_format: table\\n```\\n\\nThis query returns a response similar to this:\\n\\n```json\\n\\"sql\\":{\\n \\"metrics\\":{\\n \\"counter_name\\":\\"User Connections \\",\\n \\"cntr_value\\":7\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n```\\n\\nThe response generated above adds the counter_name as a key in the document.\\n\\n**Response format variables** \\nThe variable format supports key:value pairs. This format expects only two columns to fetch in a query.\\n\\nExample:\\n\\n```sql\\ndriver: \\"mssql\\"\\nsql_queries:\\n - query: \\"SELECT counter_name, cntr_value FROM sys.dm_os_performance_counters WHERE counter_name= \'User Connections\'\\"\\n response_format: variables\\n```\\n\\nThe variable format takes the first variable in the query above as the key:\\n\\n```json\\n\\"sql\\":{\\n \\"metrics\\":{\\n \\"user connections \\":7\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n```\\n\\nIn the above response, you can see the value of counter_name is used to generate the key in variable format.\\n\\n### Response optimization: merge_results\\n\\nWe are now supporting merging multiple query responses into a single event. By enabling **merge_results** , users can significantly optimize the storage space of the metrics ingested to Elasticsearch. This mode enables an efficient compaction of the document generated, where instead of generating multiple documents, a single merged document is generated wherever applicable. The metrics of a similar kind, generated from multiple queries, are combined into a single event.\\n\\n![Output of Merge results](/assets/images/sql-inputs-database-metrics-observability/elastic-blog-2-output-merge-results.png)\\n\\nSyntax: `merge_results: true {{or}} false`\\n\\nIn the below example, you can see how the data is loaded into Elasticsearch for the below query when the merge_results is disabled.\\n\\nExample:\\n\\nIn this example, we are using two different queries to fetch metrics from the performance counter.\\n\\n```yaml\\nmerge_results: false\\ndriver: \\"mssql\\"\\nsql_queries:\\n - query: \\"SELECT cntr_value As \'user_connections\' FROM sys.dm_os_performance_counters WHERE counter_name= \'User Connections\'\\"\\n response_format: table\\n - query: \\"SELECT cntr_value As \'buffer_cache_hit_ratio\' FROM sys.dm_os_performance_counters WHERE counter_name = \'Buffer cache hit ratio\' AND object_name like \'%Buffer Manager%\'\\"\\n response_format: table\\n```\\n\\nAs you can see, the response for the above example generates a single document for each query.\\n\\nThe resulting document from the first query:\\n\\n```json\\n\\"sql\\":{\\n \\"metrics\\":{\\n \\"user_connections\\":7\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n```\\n\\nAnd resulting document from the second query:\\n\\n```json\\n\\"sql\\":{\\n \\"metrics\\":{\\n \\"buffer_cache_hit_ratio\\":87\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n```\\n\\nWhen we enable the merge_results flag in the query, both the above metrics are combined together and the data gets loaded in a single document.\\n\\nYou can see the merged document in the below example:\\n\\n```json\\n\\"sql\\":{\\n \\"metrics\\":{\\n \\"user connections \\":7,\\n “buffer_cache_hit_ratio”:87\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n```\\n\\n_However, such a merge is possible only if the table queries are merged, and each produces a single row. There is no restriction on variable queries being merged._\\n\\n### Introducing a new capability: fetch_from_all_databases\\n\\nThis is a [new functionality](https://github.com/elastic/beats/pull/35688) to fetch all the database metrics automatically from the system and user databases of the Microsoft SQL Server, by enabling the fetch_from_all_databases flag.\\n\\nKeep an eye out for the [8.10 release version](https://www.elastic.co/guide/en/beats/metricbeat/8.10/metricbeat-module-sql.html#_example_execute_given_queries_for_all_databases_present_in_a_server) where you can start using the fetch all database feature. Prior to the 8.10 version, users had to provide the database names manually to fetch metrics from custom/user databases.\\n\\nSyntax: `fetch_from_all_databases: true {{or}} false`\\n\\nBelow is the sample query with fetch all databases flag as disabled:\\n\\n```yaml\\nfetch_from_all_databases: false\\ndriver: \\"mssql\\"\\nsql_queries:\\n - query: \\"SELECT @@servername AS server_name, @@servicename AS instance_name, name As \'database_name\', database_id FROM sys.databases WHERE name=\'master\';\\"\\n```\\n\\nThe above query fetches metrics only for the provided database name. Here the input database is master, so the metrics are fetched only for the master.\\n\\nBelow is the sample query with the fetch all databases flag as enabled:\\n\\n```yaml\\nfetch_from_all_databases: true\\ndriver: \\"mssql\\"\\nsql_queries:\\n - query: SELECT @@servername AS server_name, @@servicename AS instance_name, DB_NAME() AS \'database_name\', DB_ID() AS database_id;\\n response_format: table\\n```\\n\\nThe above query fetches metrics from all available databases. This is useful when the user wants to get data from all the databases.\\n\\nPlease note: currently this feature is supported only for Microsoft SQL Server and will be used by MS SQL integration internally, to support extracting metrics for [all user DBs](https://github.com/elastic/integrations/issues/4108) by default.\\n\\n## Using generic SQL: Metricbeat\\n\\nThe generic [SQL metricbeat module](https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html) provides flexibility to execute queries against different database drivers. The metricbeat input is available as GA for any production usage. [Here](https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html), you can find more information on configuring [the generic SQL](https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html) for different drivers with various examples.\\n\\n## Using generic SQL: Input package\\n\\nThe input package provides a flexible solution to advanced users for customizing their ingestion experience in Elastic. Generic SQL is now also available as an SQL[input package](https://docs.elastic.co/integrations/sql). The input package is currently available for early users as a **beta release**. Let\'s take a walk through how users can use generic SQL via the input package.\\n\\n### Configurations of generic SQL input package:\\n\\nThe configuration options for the generic SQL input package are as below:\\n\\n- **Driver\\\\*\\\\*** :\\\\*\\\\* This is the SQL database for which you want to use the package. In this case, we will take mysql as an example.\\n- **Hosts:** Here the user enters the connection string to connect to the database. It would vary depending on which database/driver is being used. Refer [here](https://docs.elastic.co/integrations/sql#hosts) for examples.\\n- **SQL Queries:** Here the user writes the SQL queries they want to fire and the response_format is specified.\\n- **Data set:** The user specifies a [data set](https://www.elastic.co/guide/en/ecs/master/ecs-data_stream.html#_data_stream_field_details) name to which the response fields get mapped.\\n- **Merge results\\\\*\\\\*** :\\\\*\\\\* This is an advanced setting, used to merge queries into a single event.\\n\\n![Configuration parameters for SQL input package](/assets/images/sql-inputs-database-metrics-observability/elastic-blog-3-SQL-metrics-inputpackage.png)\\n\\n![Metrics getting mapped to the index created by the ‘sql_first_dataset’](/assets/images/sql-inputs-database-metrics-observability/elastic-blog-4-expanded-document.png)\\n\\n### Metrics extensibility with customized SQL queries\\n\\nLet\'s say a user is using [MYSQL Integration](https://docs.elastic.co/integrations/mysql), which provides a fixed set of metrics. Their requirement now extends to retrieving more metrics from the MYSQL database by firing new customized SQL queries.\\n\\nThis can be achieved by adding an instance of SQL input package, writing the customized queries and specifying a new [data set](https://www.elastic.co/guide/en/ecs/master/ecs-data_stream.html#field-data-stream-dataset) name as shown in the screenshot below.\\n\\nThis way users can get any metrics by executing corresponding queries. The resultant metrics of the query will be indexed to the new data set, sql_second_dataset.\\n\\n![Customization of Ingest Pipelines and Mappings](/assets/images/sql-inputs-database-metrics-observability/elastic-blog-5-driver.png)\\n\\nWhen there are multiple queries, users can club them into a single event by enabling the Merge Results toggle.\\n\\n### Customizing user experience\\n\\nUsers can customize their data by writing their own [ingest pipelines](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) and providing their customized [mappings](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html). Users can also build their own bespoke dashboards.\\n\\n![Customization of Ingest Pipelines and Mappings](/assets/images/sql-inputs-database-metrics-observability/elastic-blog-6-ingest-pipeline.png)\\n\\nAs we can see above, the SQL input package provides the flexibility to get new metrics by running new queries, which are not supported in the default MYSQL integration (the user gets metrics from a predetermined set of queries).\\n\\nThe SQL input package also supports multiple drivers: mssql, postgresql and oracle. So a single input package can be used to cater to all these databases.\\n\\nNote: The fetch_from_all_databases feature is not supported in the SQL input package yet.\\n\\n## Try it out!\\n\\nNow that you know about various use cases and features of generic SQL, get started with [Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home) and try using the [SQL input package](https://docs.elastic.co/integrations/sql) for your SQL database and get customized experience and metrics. If you are looking for newer metrics for some of our existing SQL based integrations — like [Microsoft SQL Server](https://docs.elastic.co/en/integrations/microsoft_sqlserver), [Oracle](https://docs.elastic.co/integrations/oracle), and more — go ahead and give the SQL input package a swirl.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var b=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)a(n,i,{get:e[i],enumerable:!0})},o=(n,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let s of p(e))!f.call(n,s)&&s!==i&&a(n,s,{get:()=>e[s],enumerable:!(r=m(e,s))||r.enumerable});return n};var y=(n,e,i)=>(i=n!=null?u(g(n)):{},o(e||!n||!n.__esModule?a(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>o(a({},\\"__esModule\\",{value:!0}),n);var c=b((L,l)=>{l.exports=_jsx_runtime});var q={};w(q,{default:()=>h,frontmatter:()=>_});var t=y(c()),_={title:\\"Elastic SQL inputs: A generic solution for database metrics observability\\",slug:\\"sql-inputs-database-metrics-observability\\",date:\\"2023-09-11\\",description:\\"This blog dives into the functionality of generic SQL and provides various use cases for advanced users to ingest custom metrics to Elastic for database observability. We also introduce the fetch from all database new capability released in 8.10.\\",author:[{slug:\\"lalit-satapathy\\"},{slug:\\"ishleen-kaur\\"},{slug:\\"muthukumar-paramasivam\\"}],image:\\"patterns-midnight-background-no-logo-observability.png\\",tags:[{slug:\\"log-analytics\\"},{slug:\\"log-analytics\\"},{slug:\\"kql\\"}]};function d(n){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" SQL inputs (\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html\\",rel:\\"nofollow\\",children:\\"metricbeat\\"}),\\" module and \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/sql\\",rel:\\"nofollow\\",children:\\"input package\\"}),\\") allows the user to execute \\",(0,t.jsx)(e.a,{href:\\"https://en.wikipedia.org/wiki/SQL\\",rel:\\"nofollow\\",children:\\"SQL\\"}),\\" queries against many supported databases in a flexible way and ingest the resulting metrics to Elasticsearch\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\". This blog dives into the functionality of generic SQL and provides various use cases for \\",(0,t.jsx)(e.em,{children:\\"advanced users\\"}),\\" to ingest custom metrics to Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\", for database observability. The blog also introduces the fetch from all database new capability, released in 8.10.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"why-generic-sql\\",children:\\"Why \\\\u201CGeneric SQL\\\\u201D?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic already has metricbeat and integration packages targeted for specific databases. One example is \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-mysql.html\\",rel:\\"nofollow\\",children:\\"metricbeat\\"}),\\" for MySQL \\\\u2014 and the corresponding integration \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/mysql\\",rel:\\"nofollow\\",children:\\"package\\"}),\\". These beats modules and integrations are customized for a specific database, and the metrics are extracted using pre-defined queries from the specific database. The queries used in these integrations and the corresponding metrics are \\",(0,t.jsx)(e.em,{children:\\"not\\"}),\\" available for modification.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Whereas the \\",(0,t.jsx)(e.em,{children:\\"Generic SQL inputs\\"}),\\" (\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html\\",rel:\\"nofollow\\",children:\\"metricbeat\\"}),\\" or \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/sql\\",rel:\\"nofollow\\",children:\\"input package\\"}),\\") can be used to scrape metrics from any supported database using the user\'s SQL queries. The queries are provided by the user depending on specific metrics to be extracted. This enables a much more powerful mechanism for metrics ingestion, where users can choose a specific driver and provide the relevant SQL queries and the results get mapped to one or more Elasticsearch documents, using a structured mapping process (table/variable format explained later).\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Generic SQL inputs can be used in conjunction with the existing integration packages, which already extract specific database metrics, to extract additional custom metrics dynamically, making this input very powerful. In this blog, \\",(0,t.jsx)(e.em,{children:\\"Generic SQL input\\"}),\\" and \\",(0,t.jsx)(e.em,{children:\\"Generic SQL\\"}),\\" are used interchangeably.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sql-inputs-database-metrics-observability/elastic-blog-1-genericSQL.png\\",alt:\\"Generic SQL database metrics collection\\",width:\\"1999\\",height:\\"1146\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"functionalities-details\\",children:\\"Functionalities details\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This section covers some of the features that would help with the metrics extraction. We provide a brief description of the response format configuration. Then we dive into the merge_results functionality, which is used to combine results from multiple SQL queries into a single document.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The next key functionality users may be interested in is to collect metrics from all the custom databases, which is now possible with the fetch_from_all_databases feature.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now let\'s dive into the specific functionalities:\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"different-drivers-supported\\",children:\\"Different drivers supported\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The generic SQL can fetch metrics from the different databases. The current version has the capability to fetch metrics from the following drivers: MySQL, PostgreSQL, Oracle, and Microsoft SQL Server(MSSQL).\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"response-format\\",children:\\"Response format\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The response format in generic SQL is used to manipulate the data in either table or in variable format. Here\\\\u2019s an overview of the formats and syntax for creating and using the table and variables.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Syntax: \\",(0,t.jsx)(e.code,{children:\\"response_format: table {{or}} variables\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Response format table\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"This mode generates a single event for each row. The table format has no restrictions on the number of columns in the response. This format can have any number of columns.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Example:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-sql\\",children:`driver: \\"mssql\\"\\nsql_queries:\\n - query: \\"SELECT counter_name, cntr_value FROM sys.dm_os_performance_counters WHERE counter_name= \'User Connections\'\\"\\n response_format: table\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This query returns a response similar to this:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`\\"sql\\":{\\n \\"metrics\\":{\\n \\"counter_name\\":\\"User Connections \\",\\n \\"cntr_value\\":7\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The response generated above adds the counter_name as a key in the document.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Response format variables\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"The variable format supports key:value pairs. This format expects only two columns to fetch in a query.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Example:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-sql\\",children:`driver: \\"mssql\\"\\nsql_queries:\\n - query: \\"SELECT counter_name, cntr_value FROM sys.dm_os_performance_counters WHERE counter_name= \'User Connections\'\\"\\n response_format: variables\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The variable format takes the first variable in the query above as the key:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`\\"sql\\":{\\n \\"metrics\\":{\\n \\"user connections \\":7\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the above response, you can see the value of counter_name is used to generate the key in variable format.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"response-optimization-merge_results\\",children:\\"Response optimization: merge_results\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We are now supporting merging multiple query responses into a single event. By enabling \\",(0,t.jsx)(e.strong,{children:\\"merge_results\\"}),\\" , users can significantly optimize the storage space of the metrics ingested to Elasticsearch. This mode enables an efficient compaction of the document generated, where instead of generating multiple documents, a single merged document is generated wherever applicable. The metrics of a similar kind, generated from multiple queries, are combined into a single event.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sql-inputs-database-metrics-observability/elastic-blog-2-output-merge-results.png\\",alt:\\"Output of Merge results\\",width:\\"976\\",height:\\"466\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Syntax: \\",(0,t.jsx)(e.code,{children:\\"merge_results: true {{or}} false\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the below example, you can see how the data is loaded into Elasticsearch for the below query when the merge_results is disabled.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Example:\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this example, we are using two different queries to fetch metrics from the performance counter.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`merge_results: false\\ndriver: \\"mssql\\"\\nsql_queries:\\n - query: \\"SELECT cntr_value As \'user_connections\' FROM sys.dm_os_performance_counters WHERE counter_name= \'User Connections\'\\"\\n response_format: table\\n - query: \\"SELECT cntr_value As \'buffer_cache_hit_ratio\' FROM sys.dm_os_performance_counters WHERE counter_name = \'Buffer cache hit ratio\' AND object_name like \'%Buffer Manager%\'\\"\\n response_format: table\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see, the response for the above example generates a single document for each query.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The resulting document from the first query:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`\\"sql\\":{\\n \\"metrics\\":{\\n \\"user_connections\\":7\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And resulting document from the second query:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`\\"sql\\":{\\n \\"metrics\\":{\\n \\"buffer_cache_hit_ratio\\":87\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"When we enable the merge_results flag in the query, both the above metrics are combined together and the data gets loaded in a single document.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can see the merged document in the below example:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`\\"sql\\":{\\n \\"metrics\\":{\\n \\"user connections \\":7,\\n \\\\u201Cbuffer_cache_hit_ratio\\\\u201D:87\\n },\\n \\"driver\\":\\"mssql\\"\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"However, such a merge is possible only if the table queries are merged, and each produces a single row. There is no restriction on variable queries being merged.\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"introducing-a-new-capability-fetch_from_all_databases\\",children:\\"Introducing a new capability: fetch_from_all_databases\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This is a \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/beats/pull/35688\\",rel:\\"nofollow\\",children:\\"new functionality\\"}),\\" to fetch all the database metrics automatically from the system and user databases of the Microsoft SQL Server, by enabling the fetch_from_all_databases flag.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Keep an eye out for the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/8.10/metricbeat-module-sql.html#_example_execute_given_queries_for_all_databases_present_in_a_server\\",rel:\\"nofollow\\",children:\\"8.10 release version\\"}),\\" where you can start using the fetch all database feature. Prior to the 8.10 version, users had to provide the database names manually to fetch metrics from custom/user databases.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Syntax: \\",(0,t.jsx)(e.code,{children:\\"fetch_from_all_databases: true {{or}} false\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Below is the sample query with fetch all databases flag as disabled:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`fetch_from_all_databases: false\\ndriver: \\"mssql\\"\\nsql_queries:\\n - query: \\"SELECT @@servername AS server_name, @@servicename AS instance_name, name As \'database_name\', database_id FROM sys.databases WHERE name=\'master\';\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The above query fetches metrics only for the provided database name. Here the input database is master, so the metrics are fetched only for the master.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Below is the sample query with the fetch all databases flag as enabled:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`fetch_from_all_databases: true\\ndriver: \\"mssql\\"\\nsql_queries:\\n - query: SELECT @@servername AS server_name, @@servicename AS instance_name, DB_NAME() AS \'database_name\', DB_ID() AS database_id;\\n response_format: table\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The above query fetches metrics from all available databases. This is useful when the user wants to get data from all the databases.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Please note: currently this feature is supported only for Microsoft SQL Server and will be used by MS SQL integration internally, to support extracting metrics for \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/integrations/issues/4108\\",rel:\\"nofollow\\",children:\\"all user DBs\\"}),\\" by default.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"using-generic-sql-metricbeat\\",children:\\"Using generic SQL: Metricbeat\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The generic \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html\\",rel:\\"nofollow\\",children:\\"SQL metricbeat module\\"}),\\" provides flexibility to execute queries against different database drivers. The metricbeat input is available as GA for any production usage. \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html\\",rel:\\"nofollow\\",children:\\"Here\\"}),\\", you can find more information on configuring \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-sql.html\\",rel:\\"nofollow\\",children:\\"the generic SQL\\"}),\\" for different drivers with various examples.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"using-generic-sql-input-package\\",children:\\"Using generic SQL: Input package\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The input package provides a flexible solution to advanced users for customizing their ingestion experience in Elastic. Generic SQL is now also available as an SQL\\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/sql\\",rel:\\"nofollow\\",children:\\"input package\\"}),\\". The input package is currently available for early users as a \\",(0,t.jsx)(e.strong,{children:\\"beta release\\"}),\\". Let\'s take a walk through how users can use generic SQL via the input package.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"configurations-of-generic-sql-input-package\\",children:\\"Configurations of generic SQL input package:\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The configuration options for the generic SQL input package are as below:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Driver**\\"}),\\" :** This is the SQL database for which you want to use the package. In this case, we will take mysql as an example.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Hosts:\\"}),\\" Here the user enters the connection string to connect to the database. It would vary depending on which database/driver is being used. Refer \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/sql#hosts\\",rel:\\"nofollow\\",children:\\"here\\"}),\\" for examples.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"SQL Queries:\\"}),\\" Here the user writes the SQL queries they want to fire and the response_format is specified.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Data set:\\"}),\\" The user specifies a \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/master/ecs-data_stream.html#_data_stream_field_details\\",rel:\\"nofollow\\",children:\\"data set\\"}),\\" name to which the response fields get mapped.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Merge results**\\"}),\\" :** This is an advanced setting, used to merge queries into a single event.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sql-inputs-database-metrics-observability/elastic-blog-3-SQL-metrics-inputpackage.png\\",alt:\\"Configuration parameters for SQL input package\\",width:\\"1768\\",height:\\"1584\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sql-inputs-database-metrics-observability/elastic-blog-4-expanded-document.png\\",alt:\\"Metrics getting mapped to the index created by the \\\\u2018sql_first_dataset\\\\u2019\\",width:\\"1999\\",height:\\"1031\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"metrics-extensibility-with-customized-sql-queries\\",children:\\"Metrics extensibility with customized SQL queries\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Let\'s say a user is using \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/mysql\\",rel:\\"nofollow\\",children:\\"MYSQL Integration\\"}),\\", which provides a fixed set of metrics. Their requirement now extends to retrieving more metrics from the MYSQL database by firing new customized SQL queries.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This can be achieved by adding an instance of SQL input package, writing the customized queries and specifying a new \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/master/ecs-data_stream.html#field-data-stream-dataset\\",rel:\\"nofollow\\",children:\\"data set\\"}),\\" name as shown in the screenshot below.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This way users can get any metrics by executing corresponding queries. The resultant metrics of the query will be indexed to the new data set, sql_second_dataset.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sql-inputs-database-metrics-observability/elastic-blog-5-driver.png\\",alt:\\"Customization of Ingest Pipelines and Mappings\\",width:\\"1000\\",height:\\"1524\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"When there are multiple queries, users can club them into a single event by enabling the Merge Results toggle.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"customizing-user-experience\\",children:\\"Customizing user experience\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Users can customize their data by writing their own \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html\\",rel:\\"nofollow\\",children:\\"ingest pipelines\\"}),\\" and providing their customized \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html\\",rel:\\"nofollow\\",children:\\"mappings\\"}),\\". Users can also build their own bespoke dashboards.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sql-inputs-database-metrics-observability/elastic-blog-6-ingest-pipeline.png\\",alt:\\"Customization of Ingest Pipelines and Mappings\\",width:\\"990\\",height:\\"1004\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As we can see above, the SQL input package provides the flexibility to get new metrics by running new queries, which are not supported in the default MYSQL integration (the user gets metrics from a predetermined set of queries).\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The SQL input package also supports multiple drivers: mssql, postgresql and oracle. So a single input package can be used to cater to all these databases.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Note: The fetch_from_all_databases feature is not supported in the SQL input package yet.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that you know about various use cases and features of generic SQL, get started with \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and try using the \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/sql\\",rel:\\"nofollow\\",children:\\"SQL input package\\"}),\\" for your SQL database and get customized experience and metrics. If you are looking for newer metrics for some of our existing SQL based integrations \\\\u2014 like \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/microsoft_sqlserver\\",rel:\\"nofollow\\",children:\\"Microsoft SQL Server\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/oracle\\",rel:\\"nofollow\\",children:\\"Oracle\\"}),\\", and more \\\\u2014 go ahead and give the SQL input package a swirl.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return v(q);})();\\n;return Component;"},"_id":"articles/elastic-sql-inputs-generic-solution-database-metrics-observability.mdx","_raw":{"sourceFilePath":"articles/elastic-sql-inputs-generic-solution-database-metrics-observability.mdx","sourceFileName":"elastic-sql-inputs-generic-solution-database-metrics-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-sql-inputs-generic-solution-database-metrics-observability"},"type":"Article","imageUrl":"/assets/images/sql-inputs-database-metrics-observability/patterns-midnight-background-no-logo-observability.png","readingTime":"10 min read","url":"/sql-inputs-database-metrics-observability","headings":[{"level":2,"title":"Why “Generic SQL”?","href":"#why-generic-sql"},{"level":2,"title":"Functionalities details","href":"#functionalities-details"},{"level":3,"title":"Different drivers supported","href":"#different-drivers-supported"},{"level":3,"title":"Response format","href":"#response-format"},{"level":3,"title":"Response optimization: merge_results","href":"#response-optimization-merge_results"},{"level":3,"title":"Introducing a new capability: fetch_from_all_databases","href":"#introducing-a-new-capability-fetch_from_all_databases"},{"level":2,"title":"Using generic SQL: Metricbeat","href":"#using-generic-sql-metricbeat"},{"level":2,"title":"Using generic SQL: Input package","href":"#using-generic-sql-input-package"},{"level":3,"title":"Configurations of generic SQL input package:","href":"#configurations-of-generic-sql-input-package"},{"level":3,"title":"Metrics extensibility with customized SQL queries","href":"#metrics-extensibility-with-customized-sql-queries"},{"level":3,"title":"Customizing user experience","href":"#customizing-user-experience"},{"level":2,"title":"Try it out!","href":"#try-it-out"}]},{"title":"Elastic Synthetics Projects: A Git-friendly way to manage your synthetics monitors in Elastic Observability","slug":"synthetics-git-ops-observability","date":"2023-02-23","description":"Elastic Observability can easily integrate into your DevOps git flow when managing applications with synthetics. Our new Synthetics Projects will enable you to develop and manage synthetics monitor configurations written in YAML with git.","image":"blog-charts-packages.png","author":[{"slug":"andrew-cholakian","type":"Author","_raw":{}}],"tags":[{"slug":"synthetics","type":"Tag","_raw":{}},{"slug":"gitops","type":"Tag","_raw":{}},{"slug":"javascript","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic has an entirely new Heartbeat/Synthetics workflow superior to the current workflow. If you’re a current user of the Elastic Uptime app, read on to learn about the improved workflow you can use today and should eventually migrate toward.\\n\\nWe’ve recently released a beta feature that provides a Git-friendly IaaC oriented workflow. You can now push Heartbeat monitors with the same ease with which you push code changes in Git or config changes in Terraform. The features discussed in this blog are all currently in beta, and we urge users trying these features out to upgrade to the latest stack version first. When these features become GA, this new workflow will be the preferred way of configuring monitors in the Elastic Stack. If you’re starting a new project, you may want to consider setting it up this way instead of via our more classic configuration.\\n\\nToday, using Heartbeat is simple. You just need to write a little YAML and monitoring data shows up in Elasticsearch, visible in the Uptime UI. While the UI is indeed simple, there’s some hidden complexity there that we’ve improved with a new UI (the Synthetics app) and augmented with an even more automation friendly CLI workflow via our new Projects feature, which will be discussed below.\\n\\nHow do you manage your configs written in YAML? Many of our users will manage YAML in Git and use tooling such as Ansible, Helm, or similar to manage their infrastructure as code (IaaC). As with any other organization, Elastic also heavily utilizes IaaC in all parts of our operations. Hence it’s only natural we developed a capability to provide you with similar support for the current Heartbeat capability and the upcoming synthetics monitoring capabilities.\\n\\n## Projects: A new way to organize and distribute configs\\n\\nLet’s dive right into what we’re calling “Synthetics Projects” and how they differ from traditional Heartbeat config files. To use this feature, you would start by [creating a project](https://www.elastic.co/guide/en/observability/current/synthetics-get-started-project.html) in a Git repo containing your configs. At a high level, setting up a project requires performing the following tasks:\\n\\n1. Run npx @elastic/synthetics init to create a project skeleton in a directory. See more details on the [npmjs.com](https://www.npmjs.com/package/@elastic/synthetics) site.\\n2. Run git init and git push on the generated directory to version it as a Git repository.\\n3. Add your lightweight YAML files and browser javascript/typescript files to the journeys folder.\\n4. Test that it works by running npx @elastic/synthetics push command to sync your project to your Elastic Stack.\\n5. Configure a CI/CD pipeline to test pull requests to your Git repo and to execute npx @elastic/synthetics push on merges to the main branch.\\n\\nSo, once configured, adding, removing, and editing monitors involves:\\n\\n1. Editing a monitor’s config, either YAML for lightweight monitors, or Javascript/Typescript for browser based ones locally\\n2. Testing your local configs with npx @elastic/synthetics journeys\\n3. Creating a new PR to your main branch via a Git push\\n4. Waiting for your CI server to perform the same validation and waiting for someone else on your team to review your PR\\n5. Merging your result to the main branch\\n6. Waiting for your CI server to push the changes to your Elastic stack\\n\\nWe’ve depicted the flow of data in the diagram below:\\n\\n![](/assets/images/synthetics-git-ops-observability/blog-elastic-flow-of-data-diagram.png)\\n\\nThis is, in fact, the way many of our users work today, with other software taking the place of npx @elastic/synthetics push as mentioned earlier. Indeed, in the future, we will most likely look into building a Terraform provider, though that isn’t something we’re actively working on now.\\n\\n## Just have a few monitors? Use the GUI!\\n\\nThe above approach is great for sophisticated users with larger numbers of configurations, but if you just want to monitor a few URLs, it’s overkill. If that sounds like you, consider the new Monitor Management UI in the Uptime app! It works in the exact same way, saving configs to your Elastic Stack, but with no need for Git, or a project, or all that other infrastructure. Simply, log in, fill out the form pictured below, and hit save. If you want to set up a private location, that is still done in the same way via Fleet.\\n\\n![](/assets/images/synthetics-git-ops-observability/blog-elastic-add-monitor.png)\\n\\n## What about my existing Fleet monitors?\\n\\nA small subset of users have monitors configured today using the Synthetics Fleet integration. If that describes you, you’ll want to move onto either the GUI based approach or the Project based approach, as those methods supersede direct usage of the Fleet integration, which will eventually be restricted only to use via the above described methods.\\n\\nThe Fleet approach is inferior in a few ways:\\n\\n1. It can only configure monitors for a single location.\\n2. It creates a different UX for monitors configured on the service versus private locations.\\n3. It’s less fluid of an integration with the Uptime UI.\\n\\nIt’s rare for us to deprecate beta features, but in this case we had a clearly superior alternative. Maintaining both would have created a more confusing and unwieldy product. We don’t yet have an exact date for removing support for these monitors, but you can track this via [this GitHub issue](https://github.com/elastic/kibana/issues/137508).\\n","code":"var Component=(()=>{var d=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var w=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),m=(i,e)=>{for(var n in e)a(i,n,{get:e[n],enumerable:!0})},s=(i,e,n,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of p(e))!y.call(i,o)&&o!==n&&a(i,o,{get:()=>e[o],enumerable:!(r=g(e,o))||r.enumerable});return i};var b=(i,e,n)=>(n=i!=null?d(w(i)):{},s(e||!i||!i.__esModule?a(n,\\"default\\",{value:i,enumerable:!0}):n,i)),v=i=>s(a({},\\"__esModule\\",{value:!0}),i);var h=f((x,l)=>{l.exports=_jsx_runtime});var k={};m(k,{default:()=>u,frontmatter:()=>j});var t=b(h()),j={title:\\"Elastic Synthetics Projects: A Git-friendly way to manage your synthetics monitors in Elastic Observability\\",slug:\\"synthetics-git-ops-observability\\",date:\\"2023-02-23\\",description:\\"Elastic Observability can easily integrate into your DevOps git flow when managing applications with synthetics. Our new Synthetics Projects will enable you to develop and manage synthetics monitor configurations written in YAML with git.\\",author:[{slug:\\"andrew-cholakian\\"}],image:\\"blog-charts-packages.png\\",tags:[{slug:\\"synthetics\\"},{slug:\\"gitops\\"},{slug:\\"javascript\\"}]};function c(i){let e={a:\\"a\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"Elastic has an entirely new Heartbeat/Synthetics workflow superior to the current workflow. If you\\\\u2019re a current user of the Elastic Uptime app, read on to learn about the improved workflow you can use today and should eventually migrate toward.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\\\\u2019ve recently released a beta feature that provides a Git-friendly IaaC oriented workflow. You can now push Heartbeat monitors with the same ease with which you push code changes in Git or config changes in Terraform. The features discussed in this blog are all currently in beta, and we urge users trying these features out to upgrade to the latest stack version first. When these features become GA, this new workflow will be the preferred way of configuring monitors in the Elastic Stack. If you\\\\u2019re starting a new project, you may want to consider setting it up this way instead of via our more classic configuration.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Today, using Heartbeat is simple. You just need to write a little YAML and monitoring data shows up in Elasticsearch, visible in the Uptime UI. While the UI is indeed simple, there\\\\u2019s some hidden complexity there that we\\\\u2019ve improved with a new UI (the Synthetics app) and augmented with an even more automation friendly CLI workflow via our new Projects feature, which will be discussed below.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"How do you manage your configs written in YAML? Many of our users will manage YAML in Git and use tooling such as Ansible, Helm, or similar to manage their infrastructure as code (IaaC). As with any other organization, Elastic also heavily utilizes IaaC in all parts of our operations. Hence it\\\\u2019s only natural we developed a capability to provide you with similar support for the current Heartbeat capability and the upcoming synthetics monitoring capabilities.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"projects-a-new-way-to-organize-and-distribute-configs\\",children:\\"Projects: A new way to organize and distribute configs\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Let\\\\u2019s dive right into what we\\\\u2019re calling \\\\u201CSynthetics Projects\\\\u201D and how they differ from traditional Heartbeat config files. To use this feature, you would start by \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/synthetics-get-started-project.html\\",rel:\\"nofollow\\",children:\\"creating a project\\"}),\\" in a Git repo containing your configs. At a high level, setting up a project requires performing the following tasks:\\"]}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Run npx @elastic/synthetics init to create a project skeleton in a directory. See more details on the \\",(0,t.jsx)(e.a,{href:\\"https://www.npmjs.com/package/@elastic/synthetics\\",rel:\\"nofollow\\",children:\\"npmjs.com\\"}),\\" site.\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Run git init and git push on the generated directory to version it as a Git repository.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Add your lightweight YAML files and browser javascript/typescript files to the journeys folder.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Test that it works by running npx @elastic/synthetics push command to sync your project to your Elastic Stack.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Configure a CI/CD pipeline to test pull requests to your Git repo and to execute npx @elastic/synthetics push on merges to the main branch.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"So, once configured, adding, removing, and editing monitors involves:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Editing a monitor\\\\u2019s config, either YAML for lightweight monitors, or Javascript/Typescript for browser based ones locally\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Testing your local configs with npx @elastic/synthetics journeys\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Creating a new PR to your main branch via a Git push\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Waiting for your CI server to perform the same validation and waiting for someone else on your team to review your PR\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Merging your result to the main branch\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Waiting for your CI server to push the changes to your Elastic stack\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\\\\u2019ve depicted the flow of data in the diagram below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/synthetics-git-ops-observability/blog-elastic-flow-of-data-diagram.png\\",alt:\\"\\",width:\\"1999\\",height:\\"422\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is, in fact, the way many of our users work today, with other software taking the place of npx @elastic/synthetics push as mentioned earlier. Indeed, in the future, we will most likely look into building a Terraform provider, though that isn\\\\u2019t something we\\\\u2019re actively working on now.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"just-have-a-few-monitors-use-the-gui\\",children:\\"Just have a few monitors? Use the GUI!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The above approach is great for sophisticated users with larger numbers of configurations, but if you just want to monitor a few URLs, it\\\\u2019s overkill. If that sounds like you, consider the new Monitor Management UI in the Uptime app! It works in the exact same way, saving configs to your Elastic Stack, but with no need for Git, or a project, or all that other infrastructure. Simply, log in, fill out the form pictured below, and hit save. If you want to set up a private location, that is still done in the same way via Fleet.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/synthetics-git-ops-observability/blog-elastic-add-monitor.png\\",alt:\\"\\",width:\\"1705\\",height:\\"1426\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-about-my-existing-fleet-monitors\\",children:\\"What about my existing Fleet monitors?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"A small subset of users have monitors configured today using the Synthetics Fleet integration. If that describes you, you\\\\u2019ll want to move onto either the GUI based approach or the Project based approach, as those methods supersede direct usage of the Fleet integration, which will eventually be restricted only to use via the above described methods.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Fleet approach is inferior in a few ways:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"It can only configure monitors for a single location.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"It creates a different UX for monitors configured on the service versus private locations.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"It\\\\u2019s less fluid of an integration with the Uptime UI.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"It\\\\u2019s rare for us to deprecate beta features, but in this case we had a clearly superior alternative. Maintaining both would have created a more confusing and unwieldy product. We don\\\\u2019t yet have an exact date for removing support for these monitors, but you can track this via \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/kibana/issues/137508\\",rel:\\"nofollow\\",children:\\"this GitHub issue\\"}),\\".\\"]})]})}function u(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(c,{...i})}):c(i)}return v(k);})();\\n;return Component;"},"_id":"articles/elastic-synthetics-git-ops-observability.mdx","_raw":{"sourceFilePath":"articles/elastic-synthetics-git-ops-observability.mdx","sourceFileName":"elastic-synthetics-git-ops-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-synthetics-git-ops-observability"},"type":"Article","imageUrl":"/assets/images/synthetics-git-ops-observability/blog-charts-packages.png","readingTime":"5 min read","url":"/synthetics-git-ops-observability","headings":[{"level":2,"title":"Projects: A new way to organize and distribute configs","href":"#projects-a-new-way-to-organize-and-distribute-configs"},{"level":2,"title":"Just have a few monitors? Use the GUI!","href":"#just-have-a-few-monitors-use-the-gui"},{"level":2,"title":"What about my existing Fleet monitors?","href":"#what-about-my-existing-fleet-monitors"}]},{"title":"Elastic Universal Profiling agent, a continuous profiling solution, is now open source","slug":"elastic-universal-profiling-agent-open-source","date":"2024-04-15","description":"At Elastic, open source isn\'t just philosophy, it\'s our DNA. Dive into the future with our open-sourced Universal Profiling agent, revolutionizing software efficiency and sustainability.","image":"tree_tunnel.jpg","author":[{"slug":"israel-ogbole","type":"Author","_raw":{}},{"slug":"christos-kalkanis","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"universal-profiling","type":"Tag","_raw":{}}],"body":{"raw":"Elastic Universal Profiling™ agent is now open source! The industry’s most advanced fleetwide continuous profiling solution empowers users to identify performance bottlenecks, reduce cloud spend, and minimize their carbon footprint. This post explores the history of the agent, its move to open source, and its future integration with OpenTelemetry.\\n\\n\\n\\n## Elastic Universal Profiling™ Agent goes open source under Apache 2 \\nAt Elastic, open source is more than just a philosophy — it\'s our DNA. We believe the benefits of whole-system continuous profiling extend far beyond performance optimization. It\'s a win for businesses and the planet alike. For instance, since launching Elastic Universal Profiling in general availability (GA), we\'ve observed a wide variety of use cases from customers.\\n\\nThese range from customers relying fully on Universal Profiling\'s [differential flame graphs and topN functions](https://www.elastic.co/guide/en/observability/current/universal-profiling.html#profiling-differential-views-intro) for insights during release management to utilizing AI assistants for quickly optimizing expensive functions. This includes using profiling data to identify the optimal energy-efficient cloud region to run certain workloads. Additionally, customers are using insights that Universal Profiling provides to build evidence to challenge cloud provider bills. As it turns out, cloud providers\' in-VM agents can consume a significant portion of the CPU time, which customers are billed for.\\n\\nIn a move that will empower the community to take advantage of continuous profiling\'s benefits, **we\'re thrilled to announce that the Elastic Universal Profiling agent** , a pioneering eBPF-based continuous profiling agent, **is now open source under the Apache 2 license!**\\n\\nThis move democratizes **hyper-scaler efficiency for everyone** , opening exciting new possibilities for the future of continuous profiling, as well as its role in observability and **OpenTelemetry**.\\n\\n\\n\\n## Implementation of the OpenTelemetry (OTel) Profiling protocol\\nOur commitment to open source goes beyond just the agent itself. We recently [announced our intent to donate](https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry) the agent to OpenTelemetry and have further solidified this goal by implementing the experimental [OTel Profiling data model](https://github.com/open-telemetry/oteps/blob/main/text/profiles/0239-profiles-data-model.md). This allows the open-sourced eBPF-based continuous profiling agent to communicate seamlessly with OpenTelemetry backends.\\n\\nBut that\'s not all! We\'ve also launched an innovative feature that [correlates profiling data with OpenTelemetry distributed traces](https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation). This powerful capability offers a deeper level of insight into application performance, enabling the identification of bottlenecks with greater precision. Upon donating the Profiling agent to OTel, Elastic will also contribute critical components that enable distributed trace correlation within the [Elastic distribution of the OTel Java agent](https://github.com/elastic/elastic-otel-java) to the upstream OTel Java SDK. This underscores Elastic Observability\'s commitment to both open source and the support of open standards like OpenTelemetry while pushing the boundaries of what is possible in observability.\\n\\n\\n\\n## What does this mean for Elastic Universal Profiling customers? \\nWe\'d like to express our **immense gratitude to all our customers** who have been part of this journey, from the early stages of private beta to GA. Your feedback has been invaluable in shaping Universal Profiling into the powerful product it is today.\\n\\nBy open-sourcing the Universal Profiling agent and contributing it to OpenTelemetry, we\'re fostering a win-win situation for both you and the broader community. This move opens doors for innovation and collaboration, ultimately leading to a more robust and versatile whole-system continuous profiling solution for everyone.\\n\\nFurthermore, we\'re actively working on exciting novel ways to integrate Universal Profiling seamlessly within Elastic Observability. Expect further announcements soon, outlining how you can unlock even greater value from your profiling data within a unified observability experience in a way that has never been done before.\\n\\nThe open-sourced agent is using the recently released (experimental) OTel Profiling [signal](https://github.com/open-telemetry/opentelemetry-proto/pull/534). As a precaution, we recommend not using it in production environments.\\n\\nPlease continue using the official Elastic distribution of the Universal Profiling agent until the agent is formally accepted by OTel and the protocol reaches a stable phase. There\'s no need to take any action at this time, and we will ensure to have a smooth transition plan in place for you.\\n\\n\\n\\n![1 - Elastic Universal Profiling](/assets/images/elastic-universal-profiling-agent-open-source/image1.png)\\n\\n## What does this mean for the OpenTelemetry community?\\nOpenTelemetry is adopting continuous profiling as a key signal. By open-sourcing the eBPF-based profiling agent and working towards donating it to OTel, Elastic is making it possible to accelerate the standardization of continuous profiling within OpenTelemetry. This move has a massive impact on the observability community, empowering everyone to continuously profile their systems with a standardized protocol.\\n\\nThis is particularly timely as [Moore\'s Law](https://www.bbc.co.uk/news/technology-32335003) slows down and cloud computing takes hold, making computational efficiency critical for businesses.\\n\\nHere\'s how whole-system continuous profiling benefits you:\\n\\n- **Maximize gross margins:** By reducing the computational resources needed to run applications, businesses can optimize their cloud spend and improve profitability. Whole-system continuous profiling is one way of identifying the most expensive applications (down to the lines of code) across diverse environments that may span multiple cloud providers. This principle aligns with the familiar adage, _\\"a penny saved is a penny earned.\\"_ In the cloud context, every CPU cycle saved translates to money saved. \\n\\n- **Minimize environmental impact:** Energy consumption associated with computing is a growing concern (source: [MIT Energy Initiative](https://energy.mit.edu/news/energy-efficient-computing/)). More efficient code translates to lower energy consumption, contributing to a reduction in carbon footprint. \\n\\n- **Accelerate engineering workflows:** Continuous profiling provides detailed insights to help debug complex issues faster, guide development, and improve overall code quality.\\n\\nThis is where Elastic Universal Profiling comes in — designed to help organizations run efficient services by minimizing computational wastage. To this end, it measures code efficiency in three dimensions: **CPU utilization** , **CO**** 2 **, and** cloud cost**.\\n\\nElastic\'s journey with continuous profiling began by joining forces with [optimyze.cloud](https://www.elastic.co/about/press/elastic-and-optimyze-join-forces-to-deliver-continuous-profiling-of-infrastructure-applications-and-services) –– this became the foundation for [Elastic Universal Profiling](https://www.elastic.co/observability/universal-profiling). We are excited to see this product evolve into its next growth phase in the open-source world.\\n\\n\\n\\n![2 - car manufacturers](/assets/images/elastic-universal-profiling-agent-open-source/image2.png)\\n\\n## Ready to give it a spin?\\nAs Elastic Universal Profiling transitions into this new open source era, the potential for transformative impact on performance optimization, cost efficiency, and environmental sustainability is immense. Elastic\'s approach — balancing innovation with responsibility — paves the way for a future where technology not only powers our world but does so in a way that is sustainable and accessible to all.\\n\\nGet started with the open source Elastic Universal Profiling agent today! [Download it directly from GitHub](https://github.com/elastic/otel-profiling-agent/) and follow the instructions in the repository.\\n\\n\\n\\n![3 - dripping graph and data](/assets/images/elastic-universal-profiling-agent-open-source/image3.png)\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n\\n\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var t in e)r(n,t,{get:e[t],enumerable:!0})},a=(n,e,t,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!m.call(n,o)&&o!==t&&r(n,o,{get:()=>e[o],enumerable:!(s=u(e,o))||s.enumerable});return n};var v=(n,e,t)=>(t=n!=null?p(f(n)):{},a(e||!n||!n.__esModule?r(t,\\"default\\",{value:n,enumerable:!0}):t,n)),b=n=>a(r({},\\"__esModule\\",{value:!0}),n);var c=y((E,l)=>{l.exports=_jsx_runtime});var P={};w(P,{default:()=>d,frontmatter:()=>T});var i=v(c()),T={title:\\"Elastic Universal Profiling agent, a continuous profiling solution, is now open source\\",slug:\\"elastic-universal-profiling-agent-open-source\\",date:\\"2024-04-15\\",description:\\"At Elastic, open source isn\'t just philosophy, it\'s our DNA. Dive into the future with our open-sourced Universal Profiling agent, revolutionizing software efficiency and sustainability.\\",author:[{slug:\\"israel-ogbole\\"},{slug:\\"christos-kalkanis\\"}],image:\\"tree_tunnel.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"universal-profiling\\"}]};function h(n){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(e.p,{children:\\"Elastic Universal Profiling\\\\u2122 agent is now open source! The industry\\\\u2019s most advanced fleetwide continuous profiling solution empowers users to identify performance bottlenecks, reduce cloud spend, and minimize their carbon footprint. This post explores the history of the agent, its move to open source, and its future integration with OpenTelemetry.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"elastic-universal-profiling-agent-goes-open-source-under-apache-2\\",children:\\"Elastic Universal Profiling\\\\u2122 Agent goes open source under Apache 2\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"At Elastic, open source is more than just a philosophy \\\\u2014 it\'s our DNA. We believe the benefits of whole-system continuous profiling extend far beyond performance optimization. It\'s a win for businesses and the planet alike. For instance, since launching Elastic Universal Profiling in general availability (GA), we\'ve observed a wide variety of use cases from customers.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"These range from customers relying fully on Universal Profiling\'s \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/universal-profiling.html#profiling-differential-views-intro\\",rel:\\"nofollow\\",children:\\"differential flame graphs and topN functions\\"}),\\" for insights during release management to utilizing AI assistants for quickly optimizing expensive functions. This includes using profiling data to identify the optimal energy-efficient cloud region to run certain workloads. Additionally, customers are using insights that Universal Profiling provides to build evidence to challenge cloud provider bills. As it turns out, cloud providers\' in-VM agents can consume a significant portion of the CPU time, which customers are billed for.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"In a move that will empower the community to take advantage of continuous profiling\'s benefits, \\",(0,i.jsx)(e.strong,{children:\\"we\'re thrilled to announce that the Elastic Universal Profiling agent\\"}),\\" , a pioneering eBPF-based continuous profiling agent, \\",(0,i.jsx)(e.strong,{children:\\"is now open source under the Apache 2 license!\\"})]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"This move democratizes \\",(0,i.jsx)(e.strong,{children:\\"hyper-scaler efficiency for everyone\\"}),\\" , opening exciting new possibilities for the future of continuous profiling, as well as its role in observability and \\",(0,i.jsx)(e.strong,{children:\\"OpenTelemetry\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"implementation-of-the-opentelemetry-otel-profiling-protocol\\",children:\\"Implementation of the OpenTelemetry (OTel) Profiling protocol\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Our commitment to open source goes beyond just the agent itself. We recently \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry\\",rel:\\"nofollow\\",children:\\"announced our intent to donate\\"}),\\" the agent to OpenTelemetry and have further solidified this goal by implementing the experimental \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/oteps/blob/main/text/profiles/0239-profiles-data-model.md\\",rel:\\"nofollow\\",children:\\"OTel Profiling data model\\"}),\\". This allows the open-sourced eBPF-based continuous profiling agent to communicate seamlessly with OpenTelemetry backends.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"But that\'s not all! We\'ve also launched an innovative feature that \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation\\",rel:\\"nofollow\\",children:\\"correlates profiling data with OpenTelemetry distributed traces\\"}),\\". This powerful capability offers a deeper level of insight into application performance, enabling the identification of bottlenecks with greater precision. Upon donating the Profiling agent to OTel, Elastic will also contribute critical components that enable distributed trace correlation within the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java\\",rel:\\"nofollow\\",children:\\"Elastic distribution of the OTel Java agent\\"}),\\" to the upstream OTel Java SDK. This underscores Elastic Observability\'s commitment to both open source and the support of open standards like OpenTelemetry while pushing the boundaries of what is possible in observability.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"what-does-this-mean-for-elastic-universal-profiling-customers\\",children:\\"What does this mean for Elastic Universal Profiling customers?\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"We\'d like to express our \\",(0,i.jsx)(e.strong,{children:\\"immense gratitude to all our customers\\"}),\\" who have been part of this journey, from the early stages of private beta to GA. Your feedback has been invaluable in shaping Universal Profiling into the powerful product it is today.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"By open-sourcing the Universal Profiling agent and contributing it to OpenTelemetry, we\'re fostering a win-win situation for both you and the broader community. This move opens doors for innovation and collaboration, ultimately leading to a more robust and versatile whole-system continuous profiling solution for everyone.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Furthermore, we\'re actively working on exciting novel ways to integrate Universal Profiling seamlessly within Elastic Observability. Expect further announcements soon, outlining how you can unlock even greater value from your profiling data within a unified observability experience in a way that has never been done before.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"The open-sourced agent is using the recently released (experimental) OTel Profiling \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-proto/pull/534\\",rel:\\"nofollow\\",children:\\"signal\\"}),\\". As a precaution, we recommend not using it in production environments.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Please continue using the official Elastic distribution of the Universal Profiling agent until the agent is formally accepted by OTel and the protocol reaches a stable phase. There\'s no need to take any action at this time, and we will ensure to have a smooth transition plan in place for you.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-agent-open-source/image1.png\\",alt:\\"1 - Elastic Universal Profiling\\",width:\\"1999\\",height:\\"1203\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"what-does-this-mean-for-the-opentelemetry-community\\",children:\\"What does this mean for the OpenTelemetry community?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"OpenTelemetry is adopting continuous profiling as a key signal. By open-sourcing the eBPF-based profiling agent and working towards donating it to OTel, Elastic is making it possible to accelerate the standardization of continuous profiling within OpenTelemetry. This move has a massive impact on the observability community, empowering everyone to continuously profile their systems with a standardized protocol.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"This is particularly timely as \\",(0,i.jsx)(e.a,{href:\\"https://www.bbc.co.uk/news/technology-32335003\\",rel:\\"nofollow\\",children:\\"Moore\'s Law\\"}),\\" slows down and cloud computing takes hold, making computational efficiency critical for businesses.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Here\'s how whole-system continuous profiling benefits you:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Maximize gross margins:\\"}),\\" By reducing the computational resources needed to run applications, businesses can optimize their cloud spend and improve profitability. Whole-system continuous profiling is one way of identifying the most expensive applications (down to the lines of code) across diverse environments that may span multiple cloud providers. This principle aligns with the familiar adage, \\",(0,i.jsx)(e.em,{children:\'\\"a penny saved is a penny earned.\\"\'}),\\" In the cloud context, every CPU cycle saved translates to money saved.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Minimize environmental impact:\\"}),\\" Energy consumption associated with computing is a growing concern (source: \\",(0,i.jsx)(e.a,{href:\\"https://energy.mit.edu/news/energy-efficient-computing/\\",rel:\\"nofollow\\",children:\\"MIT Energy Initiative\\"}),\\"). More efficient code translates to lower energy consumption, contributing to a reduction in carbon footprint.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Accelerate engineering workflows:\\"}),\\" Continuous profiling provides detailed insights to help debug complex issues faster, guide development, and improve overall code quality.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"This is where Elastic Universal Profiling comes in \\\\u2014 designed to help organizations run efficient services by minimizing computational wastage. To this end, it measures code efficiency in three dimensions: \\",(0,i.jsx)(e.strong,{children:\\"CPU utilization\\"}),\\" , \\",(0,i.jsx)(e.strong,{children:\\"CO\\"}),\\"** 2 \\",(0,i.jsx)(e.strong,{children:\\", and\\"}),\\" cloud cost**.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic\'s journey with continuous profiling began by joining forces with \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/about/press/elastic-and-optimyze-join-forces-to-deliver-continuous-profiling-of-infrastructure-applications-and-services\\",rel:\\"nofollow\\",children:\\"optimyze.cloud\\"}),\\" \\\\u2013\\\\u2013 this became the foundation for \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"Elastic Universal Profiling\\"}),\\". We are excited to see this product evolve into its next growth phase in the open-source world.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-agent-open-source/image2.png\\",alt:\\"2 - car manufacturers\\",width:\\"1999\\",height:\\"1126\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"ready-to-give-it-a-spin\\",children:\\"Ready to give it a spin?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"As Elastic Universal Profiling transitions into this new open source era, the potential for transformative impact on performance optimization, cost efficiency, and environmental sustainability is immense. Elastic\'s approach \\\\u2014 balancing innovation with responsibility \\\\u2014 paves the way for a future where technology not only powers our world but does so in a way that is sustainable and accessible to all.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Get started with the open source Elastic Universal Profiling agent today! \\",(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/otel-profiling-agent/\\",rel:\\"nofollow\\",children:\\"Download it directly from GitHub\\"}),\\" and follow the instructions in the repository.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-agent-open-source/image3.png\\",alt:\\"3 - dripping graph and data\\",width:\\"1999\\",height:\\"1340\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,i.jsx)(e,{...n,children:(0,i.jsx)(h,{...n})}):h(n)}return b(P);})();\\n;return Component;"},"_id":"articles/elastic-universal-profiling-agent-open-source.mdx","_raw":{"sourceFilePath":"articles/elastic-universal-profiling-agent-open-source.mdx","sourceFileName":"elastic-universal-profiling-agent-open-source.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-universal-profiling-agent-open-source"},"type":"Article","imageUrl":"/assets/images/elastic-universal-profiling-agent-open-source/tree_tunnel.jpg","readingTime":"6 min read","url":"/elastic-universal-profiling-agent-open-source","headings":[{"level":2,"title":"Elastic Universal Profiling™ Agent goes open source under Apache 2 ","href":"#elastic-universal-profiling-agent-goes-open-source-under-apache-2-"},{"level":2,"title":"Implementation of the OpenTelemetry (OTel) Profiling protocol","href":"#implementation-of-the-opentelemetry-otel-profiling-protocol"},{"level":2,"title":"What does this mean for Elastic Universal Profiling customers? ","href":"#what-does-this-mean-for-elastic-universal-profiling-customers-"},{"level":2,"title":"What does this mean for the OpenTelemetry community?","href":"#what-does-this-mean-for-the-opentelemetry-community"},{"level":2,"title":"Ready to give it a spin?","href":"#ready-to-give-it-a-spin"}]},{"title":"Elastic Universal Profiling: Delivering performance improvements and reduced costs","slug":"elastic-universal-profiling-performance-improvements-reduced-costs","date":"2024-04-22","description":"In this blog, we’ll cover how a discovery by one of our engineers led to cost savings of thousands of dollars in our QA environment and magnitudes more once we deployed this change to production.","image":"money.jpg","author":[{"slug":"luca-wintergerst","type":"Author","_raw":{}},{"slug":"tim-ruhsen","type":"Author","_raw":{}}],"tags":[{"slug":"universal-profiling","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"cloud-monitoring","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn today\'s age of cloud services and SaaS platforms, continuous improvement isn\'t just a goal — it\'s a necessity. Here at Elastic, we\'re always on the lookout for ways to fine-tune our systems, be it our internal tools or the Elastic Cloud service. Our recent investigation in performance optimization within our Elastic Cloud QA environment, guided by [Elastic Universal Profiling](https://www.elastic.co/blog/continuous-profiling-efficient-cost-effective-applications), is a great example of how we turn data into actionable insights.\\n\\nIn this blog, we’ll cover how a discovery by one of our engineers led to savings of thousands of dollars in our QA environment and magnitudes more once we deployed this change to production.\\n\\n## Elastic Universal Profiling: Our go-to tool for optimization\\n\\nIn our suite of solutions for addressing performance challenges, Elastic Universal Profiling is a critical component. As an “always-on” profiler utilizing eBPF, it integrates seamlessly into our infrastructure and systematically collects comprehensive profiling data across the entirety of our system. Because there is zero-code instrumentation or reconfiguration, it’s easy to deploy on any host (including Kubernetes hosts) in our cloud — we’ve deployed it across our environment for Elastic Cloud.\\n\\nAll of our hosts run the profiling agent to collect this data, which gives us detailed insight into the performance of any service that we’re running.\\n\\n### Spotting the opportunity\\n\\nIt all started with what seemed like a routine check of our QA environment. One of our engineers was looking through the profiling data. With Universal Profiling in play, this initial discovery was relatively quick. We found a function that was not optimized and had heavy compute costs.\\n\\nLet’s go through it step-by-step.\\n\\nIn order to spot expensive functions, we can simply view a list of the TopN functions. The TopN functions list shows us all functions in all services we run that use the most CPU.\\n\\nTo sort them by their impact, we sort descending on the “total CPU”:\\n\\n- **Self CPU** measures the CPU time that a function directly uses, not including the time spent in functions it calls. This metric helps identify functions that use a lot of CPU power on their own. By improving these functions, we can make them run faster and use less CPU.\\n\\n- **Total CPU** adds up the CPU time used by the function and any functions it calls. This gives a complete picture of how much CPU a function and its related operations use. If a function has a high \\"total CPU\\" usage, it might be because it\'s calling other functions that use a lot of CPU.\\n\\n![1 - universal profiling](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/1.png)\\n\\nWhen our engineer reviewed the TopN functions list, one function called \\"... **inflateCompressedFrame** …\\" caught their attention. This is a common scenario where certain types of functions frequently become optimization targets. Here’s a simplified guide on what to look for and possible improvements:\\n\\n- **Compression/decompression:** Is there a more efficient algorithm? For example, switching from zlib to zlib-ng might offer better performance.\\n\\n- **Cryptographic hashing algorithms:** Ensure the fastest algorithm is in use. Sometimes, a quicker non-cryptographic algorithm could be suitable, depending on the security requirements.\\n\\n- **Non-cryptographic hashing algorithms:** Check if you\'re using the quickest option. xxh3, for instance, is often faster than other hashing algorithms.\\n\\n- **Garbage collection:** Minimize heap allocations, especially in frequently used paths. Opt for data structures that don\'t rely on garbage collection.\\n\\n- **Heap memory allocations:** These are typically resource-intensive. Consider alternatives like using jemalloc or mimalloc instead of the standard libc malloc() to reduce their impact.\\n\\n- **Page faults:** Keep an eye out for \\"exc_page_fault\\" in your TopN Functions or flamegraph. They indicate areas where memory access patterns could be optimized.\\n\\n- **Excessive CPU usage by kernel functions:** This may indicate too many system calls. Using larger buffers for read/write operations can reduce the number of syscalls.\\n\\n- **Serialization/deserialization:** Processes like JSON encoding or decoding can often be accelerated by switching to a faster JSON library.\\n\\nIdentifying these areas can help in pinpointing where performance can be notably improved.\\n\\nClicking on the function from the TopN view shows it in the flamegraph. Note that the flamegraph is showing the samples from the full cloud QA infrastructure. In this view, we can tell that this function alone was accounting for \\\\>US$6,000 annualized in this part of our QA environment.\\n\\n![2 - universal profiling flamegraph](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/2.png)\\n\\nAfter filtering for the thread, it became more clear what the function was doing. The following image shows a flamegraph of this thread across all of the hosts running in the QA environment.\\n\\n![3 - flamegraph shows hosts running in QA environment ](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/3.png)\\n\\n![4 - hosts running in QA environment](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/4.png)\\n\\nInstead of looking at the thread across all hosts, we can also look at a flamegraph for just one specific host.\\n\\nIf we look at this one host at a time, we can see that the impact is even more severe. Keep in mind that the 17% from before was for the full infrastructure. Some hosts may not even be running this service and therefore bring down the average.\\n\\nFiltering things down to a single host that has the service running, we can tell that this host is actually spending close to 70% of its CPU cycles on running this function.\\n\\nThe dollar cost here just for this one host would put the function at around US$600 per year.\\n\\n![5 - filtering](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/5.png)\\n\\n## Understanding the performance problem\\n\\nAfter identifying a potentially resource-intensive function, our next step involved collaborating with our Engineering teams to understand the function and work on a potential fix. Here\'s a straightforward breakdown of our approach:\\n\\n- **Understanding the function:** We began by analyzing what the function should do. It utilizes gzip for decompression. This insight led us to briefly consider strategies mentioned earlier for reducing CPU usage, such as using a more efficient compression library like zlib or switching to zstd compression.\\n- **Evaluating the current implementation:** The function currently relies on JDK\'s gzip decompression, which is expected to use native libraries under the hood. Our usual preference is Java or Ruby libraries when available because they simplify deployment. Opting for a native library directly would require us to manage different native versions for each OS and CPU we support, complicating our deployment process.\\n- **Detailed analysis using flamegraph:** A closer examination of the flamegraph revealed that the system encounters page faults and spends significant CPU cycles handling these.\\n\\n**Let’s start with understanding the Flamegraph:**\\n\\nThe last few non jdk.\\\\* JVM instructions (in green) show the allocation of a direct memory Byte Buffer started by Netty\'s DirectArena.newUnpooledChunk. Direct memory allocations are costly operations that typically should be avoided on an application\'s critical path.\\n\\nThe [Elastic AI Assistant for Observability](https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability) is also useful in understanding and optimizing parts of the flamegraph. Especially for users new to Universal Profiling, it can add lots of context to the collected data and give the user a better understanding of them and provide potential solutions.\\n\\n![6 - Detailed analysis using flamegraph](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/6.png)\\n\\n![7 - understanding flamegraph](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/7.png)\\n\\n**Netty\'s memory allocation**\\n\\nNetty, a popular asynchronous event-driven network application framework, uses the maxOrder setting to determine the size of memory chunks allocated for managing objects within its applications. The formula for calculating the chunk size is chunkSize = pageSize \\\\<\\\\< maxOrder. The default maxOrder value of either 9 or 11 results in the default memory chunk size being 4MB or 16MB, respectively, assuming a page size of 8KB.\\n\\n**Impact on memory allocation**\\n\\nNetty employs a PooledAllocator for efficient memory management, which allocates memory chunks in a pool of direct memory at startup. This allocator optimizes memory usage by reusing memory chunks for objects smaller than the defined chunk size. Any object that exceeds this threshold must be allocated outside of the PooledAllocator.\\n\\nAllocating and releasing memory outside of this pooled context incurs a higher performance cost for several reasons:\\n\\n- **Increased allocation overhead:** Objects larger than the chunk size require individual memory allocation requests. These allocations are more time-consuming and resource-intensive compared to the fast, pooled allocation mechanism for smaller objects.\\n- **Fragmentation and garbage collection (GC) pressure:** Allocating larger objects outside the pool can lead to increased memory fragmentation. Furthermore, if these objects are allocated on the heap, it can increase GC pressure, leading to potential pauses and reduced application performance.\\n- **Netty and the Beats/Agent input:** Logstash\'s Beats and Elastic Agent inputs use Netty to receive and send data. During processing of a received data batch, decompressing the data frame requires creating a buffer large enough to store the uncompressed events. If this batch is larger than the chunk size, an unpooled chunk is needed, causing a direct memory allocation that slows performance. The universal profiler allowed us to confirm that this was the case from the DirectArena.newUnpooledChunk calls in the flamegraph.\\n\\n## Fixing the performance problem in our environments\\n\\nWe decided to implement a quick workaround to test our hypothesis. Apart from having to adjust the jvm options once, this approach does not have any major downsides.\\n\\nThe immediate workaround involves manually adjusting the maxOrder setting back to its previous value. This can be achieved by adding a specific flag to the config/jvm.options file in Logstash:\\n\\n```\\n-Dio.netty.allocator.maxOrder=11\\n```\\n\\nThis adjustment will revert the default chunk size to 16MB (chunkSize = pageSize \\\\<\\\\< maxOrder, or 16MB = 8KB \\\\<\\\\< 11), which aligns with the previous behavior of Netty, thereby reducing the overhead associated with allocating and releasing larger objects outside of the PooledAllocator.\\n\\nAfter rolling out this change to some of our hosts in the QA environment, the impact was immediately visible in the profiling data.\\n\\n**Single host:**\\n\\n![8 - single host](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/8.png)\\n\\n**Multiple hosts:**\\n\\n![9 - multiple hosts](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/10.png)\\n\\nWe can also use the differential flamegraph view to see the impact.\\n\\nFor this specific thread, we’re comparing one day of data from early January to one day of data from early February across a subset of hosts. Both the overall performance improvements as well as the CO2 and cost savings are dramatic.\\n\\n![10. -cost savings](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/11.png)\\n\\nThis same comparison can also be done for a single host. In this view, we’re comparing one host in early January to that same host in early February. The actual CPU usage on that host decreased by 50%, saving us approximately US$900 per year per host.\\n\\n![11 - comparisons](/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/12.png)\\n\\n## Fixing the issue in Logstash\\n\\nIn addition to the temporary workaround, we are working on shipping a proper fix for this behavior in Logstash. You can find more details in this [issue](https://github.com/elastic/logstash/issues/15765), but the potential candidates are:\\n\\n- **Global default adjustment:** One approach is to permanently set the maxOrder back to 11 for all instances by including this change in the jvm.options file. This global change would ensure that all Logstash instances use the larger default chunk size, reducing the need for allocations outside the pooled allocator.\\n- **Custom allocator configuration:** For more targeted interventions, we could customize the allocator settings specifically within the TCP, Beats, and HTTP inputs of Logstash. This would involve configuring the maxOrder value at initialization for these inputs, providing a tailored solution that addresses the performance issues in the most affected areas of data ingestion.\\n- **Optimizing major allocation sites:** Another solution focuses on altering the behavior of significant allocation sites within Logstash. For instance, modifying the frame decompression process in the Beats input to avoid using direct memory and instead default to heap memory could significantly reduce the performance impact. This approach would circumvent the limitations imposed by the reduced default chunk size, minimizing the reliance on large direct memory allocations.\\n\\n## Cost savings and performance enhancements\\n\\nFollowing the new configuration change for Logstash instances on January 23, the platform\'s daily function cost dramatically decreased to US$350 from an initial \\\\>US$6,000, marking a significant 20x reduction. This change shows the potential for substantial cost savings through technical optimizations. However, it\'s important to note that these figures represent potential savings rather than direct cost reductions.\\n\\nJust because a host uses less CPU resources, doesn’t necessarily mean that we are also saving money. To actually benefit from this, the very last step now is to either reduce the number of VMs we have running or to scale down the CPU resources of each one to match the new resource requirements.\\n\\nThis experience with Elastic Universal Profiling highlights how crucial detailed, real-time data analysis is in identifying areas for optimization that lead to significant performance enhancements and cost savings. By implementing targeted changes based on profiling insights, we\'ve dramatically reduced CPU usage and operational costs in our QA environment with promising implications for broader production deployment.\\n\\nOur findings demonstrate the benefits of an always-on, profiling driven approach in cloud environments, providing a good foundation for future optimizations. As we scale these improvements, the potential for further cost savings and efficiency gains continues to grow.\\n\\nAll of this is also possible in your environments. [Learn how to get started today](https://www.elastic.co/observability/universal-profiling).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var u=Object.create;var r=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),v=(i,e)=>{for(var t in e)r(i,t,{get:e[t],enumerable:!0})},s=(i,e,t,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!f.call(i,o)&&o!==t&&r(i,o,{get:()=>e[o],enumerable:!(a=p(e,o))||a.enumerable});return i};var w=(i,e,t)=>(t=i!=null?u(m(i)):{},s(e||!i||!i.__esModule?r(t,\\"default\\",{value:i,enumerable:!0}):t,i)),b=i=>s(r({},\\"__esModule\\",{value:!0}),i);var c=y((T,l)=>{l.exports=_jsx_runtime});var z={};v(z,{default:()=>d,frontmatter:()=>k});var n=w(c()),k={title:\\"Elastic Universal Profiling: Delivering performance improvements and reduced costs\\",slug:\\"elastic-universal-profiling-performance-improvements-reduced-costs\\",date:\\"2024-04-22\\",description:\\"In this blog, we\\\\u2019ll cover how a discovery by one of our engineers led to cost savings of thousands of dollars in our QA environment and magnitudes more once we deployed this change to production.\\",author:[{slug:\\"luca-wintergerst\\"},{slug:\\"tim-ruhsen\\"}],image:\\"money.jpg\\",tags:[{slug:\\"universal-profiling\\"},{slug:\\"apm\\"},{slug:\\"cloud-monitoring\\"}]};function h(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"In today\'s age of cloud services and SaaS platforms, continuous improvement isn\'t just a goal \\\\u2014 it\'s a necessity. Here at Elastic, we\'re always on the lookout for ways to fine-tune our systems, be it our internal tools or the Elastic Cloud service. Our recent investigation in performance optimization within our Elastic Cloud QA environment, guided by \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-efficient-cost-effective-applications\\",rel:\\"nofollow\\",children:\\"Elastic Universal Profiling\\"}),\\", is a great example of how we turn data into actionable insights.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we\\\\u2019ll cover how a discovery by one of our engineers led to savings of thousands of dollars in our QA environment and magnitudes more once we deployed this change to production.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"elastic-universal-profiling-our-go-to-tool-for-optimization\\",children:\\"Elastic Universal Profiling: Our go-to tool for optimization\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In our suite of solutions for addressing performance challenges, Elastic Universal Profiling is a critical component. As an \\\\u201Calways-on\\\\u201D profiler utilizing eBPF, it integrates seamlessly into our infrastructure and systematically collects comprehensive profiling data across the entirety of our system. Because there is zero-code instrumentation or reconfiguration, it\\\\u2019s easy to deploy on any host (including Kubernetes hosts) in our cloud \\\\u2014 we\\\\u2019ve deployed it across our environment for Elastic Cloud.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"All of our hosts run the profiling agent to collect this data, which gives us detailed insight into the performance of any service that we\\\\u2019re running.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"spotting-the-opportunity\\",children:\\"Spotting the opportunity\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"It all started with what seemed like a routine check of our QA environment. One of our engineers was looking through the profiling data. With Universal Profiling in play, this initial discovery was relatively quick. We found a function that was not optimized and had heavy compute costs.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\\\\u2019s go through it step-by-step.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In order to spot expensive functions, we can simply view a list of the TopN functions. The TopN functions list shows us all functions in all services we run that use the most CPU.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To sort them by their impact, we sort descending on the \\\\u201Ctotal CPU\\\\u201D:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Self CPU\\"}),\\" measures the CPU time that a function directly uses, not including the time spent in functions it calls. This metric helps identify functions that use a lot of CPU power on their own. By improving these functions, we can make them run faster and use less CPU.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Total CPU\\"}),` adds up the CPU time used by the function and any functions it calls. This gives a complete picture of how much CPU a function and its related operations use. If a function has a high \\"total CPU\\" usage, it might be because it\'s calling other functions that use a lot of CPU.`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/1.png\\",alt:\\"1 - universal profiling\\",width:\\"1999\\",height:\\"1147\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\'When our engineer reviewed the TopN functions list, one function called \\"... \',(0,n.jsx)(e.strong,{children:\\"inflateCompressedFrame\\"}),\' \\\\u2026\\" caught their attention. This is a common scenario where certain types of functions frequently become optimization targets. Here\\\\u2019s a simplified guide on what to look for and possible improvements:\']}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Compression/decompression:\\"}),\\" Is there a more efficient algorithm? For example, switching from zlib to zlib-ng might offer better performance.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Cryptographic hashing algorithms:\\"}),\\" Ensure the fastest algorithm is in use. Sometimes, a quicker non-cryptographic algorithm could be suitable, depending on the security requirements.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Non-cryptographic hashing algorithms:\\"}),\\" Check if you\'re using the quickest option. xxh3, for instance, is often faster than other hashing algorithms.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Garbage collection:\\"}),\\" Minimize heap allocations, especially in frequently used paths. Opt for data structures that don\'t rely on garbage collection.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Heap memory allocations:\\"}),\\" These are typically resource-intensive. Consider alternatives like using jemalloc or mimalloc instead of the standard libc malloc() to reduce their impact.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Page faults:\\"}),\' Keep an eye out for \\"exc_page_fault\\" in your TopN Functions or flamegraph. They indicate areas where memory access patterns could be optimized.\']}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Excessive CPU usage by kernel functions:\\"}),\\" This may indicate too many system calls. Using larger buffers for read/write operations can reduce the number of syscalls.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Serialization/deserialization:\\"}),\\" Processes like JSON encoding or decoding can often be accelerated by switching to a faster JSON library.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Identifying these areas can help in pinpointing where performance can be notably improved.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Clicking on the function from the TopN view shows it in the flamegraph. Note that the flamegraph is showing the samples from the full cloud QA infrastructure. In this view, we can tell that this function alone was accounting for >US$6,000 annualized in this part of our QA environment.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/2.png\\",alt:\\"2 - universal profiling flamegraph\\",width:\\"1999\\",height:\\"1147\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"After filtering for the thread, it became more clear what the function was doing. The following image shows a flamegraph of this thread across all of the hosts running in the QA environment.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/3.png\\",alt:\\"3 - flamegraph shows hosts running in QA environment \\",width:\\"1999\\",height:\\"1147\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/4.png\\",alt:\\"4 - hosts running in QA environment\\",width:\\"1310\\",height:\\"672\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Instead of looking at the thread across all hosts, we can also look at a flamegraph for just one specific host.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If we look at this one host at a time, we can see that the impact is even more severe. Keep in mind that the 17% from before was for the full infrastructure. Some hosts may not even be running this service and therefore bring down the average.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Filtering things down to a single host that has the service running, we can tell that this host is actually spending close to 70% of its CPU cycles on running this function.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The dollar cost here just for this one host would put the function at around US$600 per year.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/5.png\\",alt:\\"5 - filtering\\",width:\\"1999\\",height:\\"1148\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"understanding-the-performance-problem\\",children:\\"Understanding the performance problem\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"After identifying a potentially resource-intensive function, our next step involved collaborating with our Engineering teams to understand the function and work on a potential fix. Here\'s a straightforward breakdown of our approach:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Understanding the function:\\"}),\\" We began by analyzing what the function should do. It utilizes gzip for decompression. This insight led us to briefly consider strategies mentioned earlier for reducing CPU usage, such as using a more efficient compression library like zlib or switching to zstd compression.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Evaluating the current implementation:\\"}),\\" The function currently relies on JDK\'s gzip decompression, which is expected to use native libraries under the hood. Our usual preference is Java or Ruby libraries when available because they simplify deployment. Opting for a native library directly would require us to manage different native versions for each OS and CPU we support, complicating our deployment process.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Detailed analysis using flamegraph:\\"}),\\" A closer examination of the flamegraph revealed that the system encounters page faults and spends significant CPU cycles handling these.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Let\\\\u2019s start with understanding the Flamegraph:\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The last few non jdk.* JVM instructions (in green) show the allocation of a direct memory Byte Buffer started by Netty\'s DirectArena.newUnpooledChunk. Direct memory allocations are costly operations that typically should be avoided on an application\'s critical path.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability\\",rel:\\"nofollow\\",children:\\"Elastic AI Assistant for Observability\\"}),\\" is also useful in understanding and optimizing parts of the flamegraph. Especially for users new to Universal Profiling, it can add lots of context to the collected data and give the user a better understanding of them and provide potential solutions.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/6.png\\",alt:\\"6 - Detailed analysis using flamegraph\\",width:\\"1999\\",height:\\"1145\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/7.png\\",alt:\\"7 - understanding flamegraph\\",width:\\"1999\\",height:\\"1145\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Netty\'s memory allocation\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Netty, a popular asynchronous event-driven network application framework, uses the maxOrder setting to determine the size of memory chunks allocated for managing objects within its applications. The formula for calculating the chunk size is chunkSize = pageSize << maxOrder. The default maxOrder value of either 9 or 11 results in the default memory chunk size being 4MB or 16MB, respectively, assuming a page size of 8KB.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Impact on memory allocation\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Netty employs a PooledAllocator for efficient memory management, which allocates memory chunks in a pool of direct memory at startup. This allocator optimizes memory usage by reusing memory chunks for objects smaller than the defined chunk size. Any object that exceeds this threshold must be allocated outside of the PooledAllocator.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Allocating and releasing memory outside of this pooled context incurs a higher performance cost for several reasons:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Increased allocation overhead:\\"}),\\" Objects larger than the chunk size require individual memory allocation requests. These allocations are more time-consuming and resource-intensive compared to the fast, pooled allocation mechanism for smaller objects.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Fragmentation and garbage collection (GC) pressure:\\"}),\\" Allocating larger objects outside the pool can lead to increased memory fragmentation. Furthermore, if these objects are allocated on the heap, it can increase GC pressure, leading to potential pauses and reduced application performance.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Netty and the Beats/Agent input:\\"}),\\" Logstash\'s Beats and Elastic Agent inputs use Netty to receive and send data. During processing of a received data batch, decompressing the data frame requires creating a buffer large enough to store the uncompressed events. If this batch is larger than the chunk size, an unpooled chunk is needed, causing a direct memory allocation that slows performance. The universal profiler allowed us to confirm that this was the case from the DirectArena.newUnpooledChunk calls in the flamegraph.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"fixing-the-performance-problem-in-our-environments\\",children:\\"Fixing the performance problem in our environments\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We decided to implement a quick workaround to test our hypothesis. Apart from having to adjust the jvm options once, this approach does not have any major downsides.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The immediate workaround involves manually adjusting the maxOrder setting back to its previous value. This can be achieved by adding a specific flag to the config/jvm.options file in Logstash:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`-Dio.netty.allocator.maxOrder=11\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This adjustment will revert the default chunk size to 16MB (chunkSize = pageSize << maxOrder, or 16MB = 8KB << 11), which aligns with the previous behavior of Netty, thereby reducing the overhead associated with allocating and releasing larger objects outside of the PooledAllocator.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"After rolling out this change to some of our hosts in the QA environment, the impact was immediately visible in the profiling data.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Single host:\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/8.png\\",alt:\\"8 - single host\\",width:\\"1999\\",height:\\"1148\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Multiple hosts:\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/10.png\\",alt:\\"9 - multiple hosts\\",width:\\"1999\\",height:\\"1148\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We can also use the differential flamegraph view to see the impact.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"For this specific thread, we\\\\u2019re comparing one day of data from early January to one day of data from early February across a subset of hosts. Both the overall performance improvements as well as the CO\\",(0,n.jsx)(\\"sub\\",{children:\\"2\\"}),\\" and cost savings are dramatic.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/11.png\\",alt:\\"10. -cost savings\\",width:\\"1999\\",height:\\"1148\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This same comparison can also be done for a single host. In this view, we\\\\u2019re comparing one host in early January to that same host in early February. The actual CPU usage on that host decreased by 50%, saving us approximately US$900 per year per host.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/12.png\\",alt:\\"11 - comparisons\\",width:\\"1999\\",height:\\"1148\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"fixing-the-issue-in-logstash\\",children:\\"Fixing the issue in Logstash\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In addition to the temporary workaround, we are working on shipping a proper fix for this behavior in Logstash. You can find more details in this \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/logstash/issues/15765\\",rel:\\"nofollow\\",children:\\"issue\\"}),\\", but the potential candidates are:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Global default adjustment:\\"}),\\" One approach is to permanently set the maxOrder back to 11 for all instances by including this change in the jvm.options file. This global change would ensure that all Logstash instances use the larger default chunk size, reducing the need for allocations outside the pooled allocator.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Custom allocator configuration:\\"}),\\" For more targeted interventions, we could customize the allocator settings specifically within the TCP, Beats, and HTTP inputs of Logstash. This would involve configuring the maxOrder value at initialization for these inputs, providing a tailored solution that addresses the performance issues in the most affected areas of data ingestion.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Optimizing major allocation sites:\\"}),\\" Another solution focuses on altering the behavior of significant allocation sites within Logstash. For instance, modifying the frame decompression process in the Beats input to avoid using direct memory and instead default to heap memory could significantly reduce the performance impact. This approach would circumvent the limitations imposed by the reduced default chunk size, minimizing the reliance on large direct memory allocations.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"cost-savings-and-performance-enhancements\\",children:\\"Cost savings and performance enhancements\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Following the new configuration change for Logstash instances on January 23, the platform\'s daily function cost dramatically decreased to US$350 from an initial >US$6,000, marking a significant 20x reduction. This change shows the potential for substantial cost savings through technical optimizations. However, it\'s important to note that these figures represent potential savings rather than direct cost reductions.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Just because a host uses less CPU resources, doesn\\\\u2019t necessarily mean that we are also saving money. To actually benefit from this, the very last step now is to either reduce the number of VMs we have running or to scale down the CPU resources of each one to match the new resource requirements.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This experience with Elastic Universal Profiling highlights how crucial detailed, real-time data analysis is in identifying areas for optimization that lead to significant performance enhancements and cost savings. By implementing targeted changes based on profiling insights, we\'ve dramatically reduced CPU usage and operational costs in our QA environment with promising implications for broader production deployment.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Our findings demonstrate the benefits of an always-on, profiling driven approach in cloud environments, providing a good foundation for future optimizations. As we scale these improvements, the potential for further cost savings and efficiency gains continues to grow.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"All of this is also possible in your environments. \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"Learn how to get started today\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,n.jsx)(e,{...i,children:(0,n.jsx)(h,{...i})}):h(i)}return b(z);})();\\n;return Component;"},"_id":"articles/elastic-universal-profiling-performance-improvements-reduced-costs.mdx","_raw":{"sourceFilePath":"articles/elastic-universal-profiling-performance-improvements-reduced-costs.mdx","sourceFileName":"elastic-universal-profiling-performance-improvements-reduced-costs.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastic-universal-profiling-performance-improvements-reduced-costs"},"type":"Article","imageUrl":"/assets/images/elastic-universal-profiling-performance-improvements-reduced-costs/money.jpg","readingTime":"12 min read","url":"/elastic-universal-profiling-performance-improvements-reduced-costs","headings":[{"level":2,"title":"Elastic Universal Profiling: Our go-to tool for optimization","href":"#elastic-universal-profiling-our-go-to-tool-for-optimization"},{"level":3,"title":"Spotting the opportunity","href":"#spotting-the-opportunity"},{"level":2,"title":"Understanding the performance problem","href":"#understanding-the-performance-problem"},{"level":2,"title":"Fixing the performance problem in our environments","href":"#fixing-the-performance-problem-in-our-environments"},{"level":2,"title":"Fixing the issue in Logstash","href":"#fixing-the-issue-in-logstash"},{"level":2,"title":"Cost savings and performance enhancements","href":"#cost-savings-and-performance-enhancements"}]},{"title":"Elastic\'s collaboration with OpenTelemetry on improving the filelog receiver","slug":"elastics-collaboration-opentelemetry-filelog-receiver","date":"2024-06-17","description":"Elastic is committed to help OpenTelemetry advance it\'s logging capabilities. Learn about our collaboration with the OpenTelemetry community on improving the capabilities and quality aspects of the OpenTelemetry Collector\'s filelog receiver.","image":"otel-filelog-receiver.jpg","author":[{"slug":"christos-markou","type":"Author","_raw":{}},{"slug":"shaunak-kashyap","type":"Author","_raw":{}},{"slug":"alexander-wert","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"logging","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs the newest generally available signal in OpenTelemetry (OTel), logging support currently lags behind tracing and metrics in terms of feature scope and maturity.\\nAt Elastic, we bring years of extensive experience with logging use cases and the challenges they present.\\nCommitted to advancing OpenTelemetry\'s logging capabilities, we have focused on enhancing its logging functionalities.\\n\\nOver the past few months, we have dealt with the capabilities of the [filelog receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.102.0/receiver/filelogreceiver/README.md)\\nin the [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/), leveraging our expertise as the [Filebeat\'s](https://www.elastic.co/beats/filebeat) maintainers to help refine and expand its potential.\\nOur goal is to contribute meaningfully to the evolution of OpenTelemetry\'s logging features, ensuring they meet the high standards required for robust observability.\\n\\nSpecifically, we focused on verifying that the receiver is well covered for cases and aspects that have been a pain for us in the past with Filebeat\\n—\xa0such as fail-over handling, self-telemetry, test coverage, documentation and usability.\\nBased on our exploration, we started insightful conversations with the OTel project\'s maintainers, sharing our thoughts and any suggestions that could be useful from our experience.\\nMoreover, we\'ve started putting up PRs to add documentation, make enhancements, improve tests, fix bugs, and even implement completely new features.\\n\\nIn this blog post we\'ll provide a sneak preview of the work that we\'ve done so far in collaboration with the OpenTelemetry community and what\'s coming next as we continue to explore ways to improve the OpenTelemetry Collector for log collection.\\n\\n## Enhancing the filelog receiver\'s telemetry\\n\\nObservability tools are software components like any other and, thus, need to be monitored as any other software to be able to debug problems and tune relevant settings.\\nIn particular, users of the filelog receiver will want to know how it\'s performing.\\nIt\'s important that the filelog receiver emits sufficient telemetry data for common troubleshooting and optimization use cases.\\nThis includes sufficient logging and observable metrics providing insights into the filelog receiver\'s internal state.\\n\\nWhile the filelog receiver already provided a good set of self-telemetry data, we identified some areas of improvement.\\nIn particular, we contributed functionality to emit self-telemetry [logs on crucial events](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/33237) like when log files are discovered, moved or truncated.\\nAnother contribution includes [observable metrics about filelog’s receiver internal state](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/31544) about how many files are opened and being harvested.\\nYou can find more information on the [respective tracking issue](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/31256).\\n\\n## Improving the Kubernetes container logs parsing\\n\\nThe filelog receiver has been able to parse Kubernetes container logs for some time now.\\nHowever, properly parsing logs from Kubernetes Pods required a fair bit of configuration to deal with different runtime formats and to extract important meta information, such as `k8s.pod.name`, `k8s.container.name`, etc.\\nWith this in mind we proposed to abstract these complex set of configuration into a simpler implementation specific container parser and contributed this new feature to the filelog receiver.\\nWith that new feature, setting up logs collection for Kubernetes is by magnitudes easier - with only eight lines of configuration vs. ~ 80 lines of configuration before.\\n\\n![1 - Usability improvement for parsing Kubernetes container logs](/assets/images/elastics-collaboration-opentelemetry-filelog-receiver/container-parser-config-example.png)\\n\\nYou can learn more about the details of the new [container logs parser in the corresponding OpenTelemetry blog post](https://opentelemetry.io/blog/2024/otel-collector-container-log-parser).\\n\\n### Evaluating test coverage\\n\\nLogs collection from files can run into different unexpected scenarios such as restarts, overload and error scenarios.\\nTo ensure reliable and consistent collection of logs, it\'s important to ensure tests cover these kind of scenarios.\\nBased on our experience with testing Filebeat, we evaluated the existing filelog receiver tests with respect to those scenarios.\\nWhile most of the use cases and scenarios were well-tested already, we identified a few scenarios to improve tests for to ensure reliable logs collection. \\nAt the creation time of this blog posts we were working on contributing additional tests to address the identified test coverage gaps.\\nYou can learn more about it in [this GitHub issue](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32001).\\n\\n### Persistence evaluation\\n\\nAnother important aspect for log collection that we often hear from Elastic\'s log users are the failover handling capabilities and the delivery guarantees for logs.\\nSome logging use cases, for example audit logging, have strict delivery guarantee requirements.\\nHence, it\'s important that the filelog receiver provides functionality to reliably handle situations, such as temporary unavailability of the logging backend or unexpected restarts of the OTel Collector.\\n\\nOverall, the filelog receiver already has corresponding functionality to deal with such situations.\\nHowever, user documentation on how to setup reliable logs collection with tangible examples was an area with potential for improvement.\\n\\nIn this regard, beyond verifying the persistence and offset tracking capabilities we worked on improving respective documentation\\n[1](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/31886) [2](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/30914)\\nand also are collaborating on a [community reported issue](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/31074) to ensure delivery guarantees for logs.\\n\\n### Helping users help themselves\\n\\nElastic has a long and varied history of supporting customers who use our products for log ingestion.\\nDrawing from this experience, we\'ve proposed a couple of documentation improvements to the OpenTelemetry Collector to help logging users get out of some tricky situations.\\n\\n**Documenting the structure of the tracking file**\\n\\nFor every log file the filelog receiver ingests, it needs to track how far into the file it has already read, so it knows where to start reading from when new contents are added to the file.\\nBy default, the filelog receiver doesn\'t persist this tracking information to disk, but it can be configured to do so.\\nWe felt it would be useful to [document the structure of this tracking file](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/32180). When ingestion stops unexpectedly,\\npeeking into this tracking file can often provide clues as to where the problem may lie.\\n\\n**Challenges with symlink target changes**\\n\\nThe filelog receiver periodically refreshes its memory of the files it\'s supposed to be ingesting.\\nThe interval at which these refreshes happen is controlled by the `poll_interval` setting.\\nIn certain setups log files being ingested by the filelog receiver are symlinks pointing to actual files.\\nMoreover, these symlinks can be updated to point to newer files over time.\\nIf the symlink target changes twice before the filelog receiver has had a chance to refresh its memory, it will miss the first change and therefore not ingest the corresponding target file.\\nWe\'ve [documented this edge case](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/32217), suggesting the users with such setups should make sure they set `poll_interval` to a sufficiently low value.\\n\\n### Planning ahead for the receiver\'s GA\xa0\\n\\nLast but not least, we have raised the topic of making the filelog receiver a generally available (GA) component.\\nFor users it\'s important to be able to rely on the stability of used functionality, hence, not being required to deal with the risk of breaking changes through minor version updates.\\nIn this regard, for the filelog receiver we have kicked off a first plan with the maintainers to mark any issue that is a blocker for stability with a `required_for_ga`\\n[label](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aopen+is%3Aissue+label%3Arelease%3Arequired-for-ga+label%3Areceiver/filelog).\\nOnce the OpenTelemetry collector goes to version `v1.0.0` we will be able to also work towards the specific receiver’s GA.\\n\\n## Conclusion\\n\\nOverall, OTel\'s filelog receiver component is in a good shape and provides important functionality for most log collection use cases.\\nWhere there are still minor gaps or need for improvement with the filelog receiver, we are gladly to contribute our expertise and experience from Filebeat use cases.\\nThe above is just the beginning of our effort to help advancing the OpenTelemetry Collector, and specifically for log collection, get closer to a stable version.\\nMoreover, we are happy to help the filelog receiver maintainers with general maintenance of the component, hence, dealing with community issues and PRs, jointly working on the component\'s roadmap, etc.\\n\\nWe\'d like to thank the OTel Collector group and, in particular, [Daniel Jaglowski](https://github.com/djaglowski) for the great and constructive collaboration on the filelog receiver, so far!\\n\\nStay tuned to [learn more about our future contributions and involvement in OpenTelemetry](https://www.elastic.co/observability/opentelemetry).\\n","code":"var Component=(()=>{var d=Object.create;var r=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var b=(o,e)=>()=>(e||o((e={exports:{}}).exports,e),e.exports),v=(o,e)=>{for(var n in e)r(o,n,{get:e[n],enumerable:!0})},s=(o,e,n,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of u(e))!m.call(o,i)&&i!==n&&r(o,i,{get:()=>e[i],enumerable:!(l=p(e,i))||l.enumerable});return o};var y=(o,e,n)=>(n=o!=null?d(f(o)):{},s(e||!o||!o.__esModule?r(n,\\"default\\",{value:o,enumerable:!0}):n,o)),w=o=>s(r({},\\"__esModule\\",{value:!0}),o);var c=b((T,a)=>{a.exports=_jsx_runtime});var x={};v(x,{default:()=>g,frontmatter:()=>k});var t=y(c()),k={title:\\"Elastic\'s collaboration with OpenTelemetry on improving the filelog receiver\\",slug:\\"elastics-collaboration-opentelemetry-filelog-receiver\\",date:\\"2024-06-17\\",description:\\"Elastic is committed to help OpenTelemetry advance it\'s logging capabilities. Learn about our collaboration with the OpenTelemetry community on improving the capabilities and quality aspects of the OpenTelemetry Collector\'s filelog receiver.\\",author:[{slug:\\"christos-markou\\"},{slug:\\"shaunak-kashyap\\"},{slug:\\"alexander-wert\\"}],image:\\"otel-filelog-receiver.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"logging\\"}]};function h(o){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",p:\\"p\\",strong:\\"strong\\",...o.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:`As the newest generally available signal in OpenTelemetry (OTel), logging support currently lags behind tracing and metrics in terms of feature scope and maturity.\\nAt Elastic, we bring years of extensive experience with logging use cases and the challenges they present.\\nCommitted to advancing OpenTelemetry\'s logging capabilities, we have focused on enhancing its logging functionalities.`}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Over the past few months, we have dealt with the capabilities of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.102.0/receiver/filelogreceiver/README.md\\",rel:\\"nofollow\\",children:\\"filelog receiver\\"}),`\\nin the `,(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Collector\\"}),\\", leveraging our expertise as the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/beats/filebeat\\",rel:\\"nofollow\\",children:\\"Filebeat\'s\\"}),` maintainers to help refine and expand its potential.\\nOur goal is to contribute meaningfully to the evolution of OpenTelemetry\'s logging features, ensuring they meet the high standards required for robust observability.`]}),`\\n`,(0,t.jsx)(e.p,{children:`Specifically, we focused on verifying that the receiver is well covered for cases and aspects that have been a pain for us in the past with Filebeat\\n\\\\u2014\\\\xA0such as fail-over handling, self-telemetry, test coverage, documentation and usability.\\nBased on our exploration, we started insightful conversations with the OTel project\'s maintainers, sharing our thoughts and any suggestions that could be useful from our experience.\\nMoreover, we\'ve started putting up PRs to add documentation, make enhancements, improve tests, fix bugs, and even implement completely new features.`}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog post we\'ll provide a sneak preview of the work that we\'ve done so far in collaboration with the OpenTelemetry community and what\'s coming next as we continue to explore ways to improve the OpenTelemetry Collector for log collection.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"enhancing-the-filelog-receivers-telemetry\\",children:\\"Enhancing the filelog receiver\'s telemetry\\"}),`\\n`,(0,t.jsx)(e.p,{children:`Observability tools are software components like any other and, thus, need to be monitored as any other software to be able to debug problems and tune relevant settings.\\nIn particular, users of the filelog receiver will want to know how it\'s performing.\\nIt\'s important that the filelog receiver emits sufficient telemetry data for common troubleshooting and optimization use cases.\\nThis includes sufficient logging and observable metrics providing insights into the filelog receiver\'s internal state.`}),`\\n`,(0,t.jsxs)(e.p,{children:[`While the filelog receiver already provided a good set of self-telemetry data, we identified some areas of improvement.\\nIn particular, we contributed functionality to emit self-telemetry `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/33237\\",rel:\\"nofollow\\",children:\\"logs on crucial events\\"}),` like when log files are discovered, moved or truncated.\\nAnother contribution includes `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/31544\\",rel:\\"nofollow\\",children:\\"observable metrics about filelog\\\\u2019s receiver internal state\\"}),` about how many files are opened and being harvested.\\nYou can find more information on the `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/31256\\",rel:\\"nofollow\\",children:\\"respective tracking issue\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"improving-the-kubernetes-container-logs-parsing\\",children:\\"Improving the Kubernetes container logs parsing\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[`The filelog receiver has been able to parse Kubernetes container logs for some time now.\\nHowever, properly parsing logs from Kubernetes Pods required a fair bit of configuration to deal with different runtime formats and to extract important meta information, such as `,(0,t.jsx)(e.code,{children:\\"k8s.pod.name\\"}),\\", \\",(0,t.jsx)(e.code,{children:\\"k8s.container.name\\"}),`, etc.\\nWith this in mind we proposed to abstract these complex set of configuration into a simpler implementation specific container parser and contributed this new feature to the filelog receiver.\\nWith that new feature, setting up logs collection for Kubernetes is by magnitudes easier - with only eight lines of configuration vs. ~ 80 lines of configuration before.`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastics-collaboration-opentelemetry-filelog-receiver/container-parser-config-example.png\\",alt:\\"1 - Usability improvement for parsing Kubernetes container logs\\",width:\\"2924\\",height:\\"1852\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can learn more about the details of the new \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2024/otel-collector-container-log-parser\\",rel:\\"nofollow\\",children:\\"container logs parser in the corresponding OpenTelemetry blog post\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"evaluating-test-coverage\\",children:\\"Evaluating test coverage\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[`Logs collection from files can run into different unexpected scenarios such as restarts, overload and error scenarios.\\nTo ensure reliable and consistent collection of logs, it\'s important to ensure tests cover these kind of scenarios.\\nBased on our experience with testing Filebeat, we evaluated the existing filelog receiver tests with respect to those scenarios.\\nWhile most of the use cases and scenarios were well-tested already, we identified a few scenarios to improve tests for to ensure reliable logs collection.`,(0,t.jsx)(e.br,{}),`\\n`,`At the creation time of this blog posts we were working on contributing additional tests to address the identified test coverage gaps.\\nYou can learn more about it in `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32001\\",rel:\\"nofollow\\",children:\\"this GitHub issue\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"persistence-evaluation\\",children:\\"Persistence evaluation\\"}),`\\n`,(0,t.jsx)(e.p,{children:`Another important aspect for log collection that we often hear from Elastic\'s log users are the failover handling capabilities and the delivery guarantees for logs.\\nSome logging use cases, for example audit logging, have strict delivery guarantee requirements.\\nHence, it\'s important that the filelog receiver provides functionality to reliably handle situations, such as temporary unavailability of the logging backend or unexpected restarts of the OTel Collector.`}),`\\n`,(0,t.jsx)(e.p,{children:`Overall, the filelog receiver already has corresponding functionality to deal with such situations.\\nHowever, user documentation on how to setup reliable logs collection with tangible examples was an area with potential for improvement.`}),`\\n`,(0,t.jsxs)(e.p,{children:[`In this regard, beyond verifying the persistence and offset tracking capabilities we worked on improving respective documentation\\n`,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/31886\\",rel:\\"nofollow\\",children:\\"1\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/30914\\",rel:\\"nofollow\\",children:\\"2\\"}),`\\nand also are collaborating on a `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/31074\\",rel:\\"nofollow\\",children:\\"community reported issue\\"}),\\" to ensure delivery guarantees for logs.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"helping-users-help-themselves\\",children:\\"Helping users help themselves\\"}),`\\n`,(0,t.jsx)(e.p,{children:`Elastic has a long and varied history of supporting customers who use our products for log ingestion.\\nDrawing from this experience, we\'ve proposed a couple of documentation improvements to the OpenTelemetry Collector to help logging users get out of some tricky situations.`}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Documenting the structure of the tracking file\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[`For every log file the filelog receiver ingests, it needs to track how far into the file it has already read, so it knows where to start reading from when new contents are added to the file.\\nBy default, the filelog receiver doesn\'t persist this tracking information to disk, but it can be configured to do so.\\nWe felt it would be useful to `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/32180\\",rel:\\"nofollow\\",children:\\"document the structure of this tracking file\\"}),`. When ingestion stops unexpectedly,\\npeeking into this tracking file can often provide clues as to where the problem may lie.`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Challenges with symlink target changes\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[`The filelog receiver periodically refreshes its memory of the files it\'s supposed to be ingesting.\\nThe interval at which these refreshes happen is controlled by the `,(0,t.jsx)(e.code,{children:\\"poll_interval\\"}),` setting.\\nIn certain setups log files being ingested by the filelog receiver are symlinks pointing to actual files.\\nMoreover, these symlinks can be updated to point to newer files over time.\\nIf the symlink target changes twice before the filelog receiver has had a chance to refresh its memory, it will miss the first change and therefore not ingest the corresponding target file.\\nWe\'ve `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/32217\\",rel:\\"nofollow\\",children:\\"documented this edge case\\"}),\\", suggesting the users with such setups should make sure they set \\",(0,t.jsx)(e.code,{children:\\"poll_interval\\"}),\\" to a sufficiently low value.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"planning-ahead-for-the-receivers-ga\\",children:\\"Planning ahead for the receiver\'s GA\\\\xA0\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[`Last but not least, we have raised the topic of making the filelog receiver a generally available (GA) component.\\nFor users it\'s important to be able to rely on the stability of used functionality, hence, not being required to deal with the risk of breaking changes through minor version updates.\\nIn this regard, for the filelog receiver we have kicked off a first plan with the maintainers to mark any issue that is a blocker for stability with a `,(0,t.jsx)(e.code,{children:\\"required_for_ga\\"}),`\\n`,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aopen+is%3Aissue+label%3Arelease%3Arequired-for-ga+label%3Areceiver/filelog\\",rel:\\"nofollow\\",children:\\"label\\"}),`.\\nOnce the OpenTelemetry collector goes to version `,(0,t.jsx)(e.code,{children:\\"v1.0.0\\"}),\\" we will be able to also work towards the specific receiver\\\\u2019s GA.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:`Overall, OTel\'s filelog receiver component is in a good shape and provides important functionality for most log collection use cases.\\nWhere there are still minor gaps or need for improvement with the filelog receiver, we are gladly to contribute our expertise and experience from Filebeat use cases.\\nThe above is just the beginning of our effort to help advancing the OpenTelemetry Collector, and specifically for log collection, get closer to a stable version.\\nMoreover, we are happy to help the filelog receiver maintainers with general maintenance of the component, hence, dealing with community issues and PRs, jointly working on the component\'s roadmap, etc.`}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We\'d like to thank the OTel Collector group and, in particular, \\",(0,t.jsx)(e.a,{href:\\"https://github.com/djaglowski\\",rel:\\"nofollow\\",children:\\"Daniel Jaglowski\\"}),\\" for the great and constructive collaboration on the filelog receiver, so far!\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Stay tuned to \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"learn more about our future contributions and involvement in OpenTelemetry\\"}),\\".\\"]})]})}function g(o={}){let{wrapper:e}=o.components||{};return e?(0,t.jsx)(e,{...o,children:(0,t.jsx)(h,{...o})}):h(o)}return w(x);})();\\n;return Component;"},"_id":"articles/elastics-collaboration-opentelemetry-filelog-receiver.mdx","_raw":{"sourceFilePath":"articles/elastics-collaboration-opentelemetry-filelog-receiver.mdx","sourceFileName":"elastics-collaboration-opentelemetry-filelog-receiver.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elastics-collaboration-opentelemetry-filelog-receiver"},"type":"Article","imageUrl":"/assets/images/elastics-collaboration-opentelemetry-filelog-receiver/otel-filelog-receiver.jpg","readingTime":"7 min read","url":"/elastics-collaboration-opentelemetry-filelog-receiver","headings":[{"level":2,"title":"Enhancing the filelog receiver\'s telemetry","href":"#enhancing-the-filelog-receivers-telemetry"},{"level":2,"title":"Improving the Kubernetes container logs parsing","href":"#improving-the-kubernetes-container-logs-parsing"},{"level":3,"title":"Evaluating test coverage","href":"#evaluating-test-coverage"},{"level":3,"title":"Persistence evaluation","href":"#persistence-evaluation"},{"level":3,"title":"Helping users help themselves","href":"#helping-users-help-themselves"},{"level":3,"title":"Planning ahead for the receiver\'s GA\xa0","href":"#planning-ahead-for-the-receivers-ga"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"How to use Elasticsearch and Time Series Data Streams for observability metrics","slug":"time-series-data-streams-observability-metrics","date":"2023-05-04","description":"With Time Series Data Streams (TSDS), Elasticsearch introduces optimized storage for metrics time series. Check out how we use it for Elastic Observability.","image":"ebpf-monitoring.jpeg","author":[{"slug":"nicolas-ruflin","type":"Author","_raw":{}}],"tags":[{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"tsdb","type":"Tag","_raw":{}},{"slug":"elastic-architecture-enhancements","type":"Tag","_raw":{}}],"body":{"raw":"\\nElasticsearch is used for a wide variety of data types — one of these is metrics. With the introduction of Metricbeat many years ago and later our APM Agents, the metric use case has become more popular. Over the years, Elasticsearch has made many improvements on how to handle things like metrics aggregations and sparse documents. At the same time, [TSVB visualizations](https://www.elastic.co/guide/en/kibana/current/tsvb.html) were introduced to make visualizing metrics easier. One concept that was missing that exists for most other metric solutions is the concept of time series with dimensions.\\n\\nMid 2021, the Elasticsearch team [embarked](https://github.com/elastic/elasticsearch/issues/74660) on making Elasticsearch a much better fit for metrics. The team created [Time Series Data Streams (TSDS)](https://www.elastic.co/guide/en/elasticsearch/reference/master/tsds.html), which were released in 8.7 as generally available (GA).\\n\\nThis blog post dives into how TSDS works and how we use it in Elastic Observability, as well as how you can use it for your own metrics.\\n\\n## A quick introduction to TSDS\\n\\n[Time Series Data Streams (TSDS)](https://www.elastic.co/guide/en/elasticsearch/reference/master/tsds.html) are built on top of data streams in Elasticsearch that are optimized for time series. To create a data stream for metrics, an additional setting on the data stream is needed. As we are using data streams, first an Index Template has to be created:\\n\\n```json\\nPUT _index_template/metrics-laptop\\n{\\n \\"index_patterns\\": [\\n \\"metrics-laptop-*\\"\\n ],\\n \\"data_stream\\": {},\\n \\"priority\\": 200,\\n \\"template\\": {\\n \\"settings\\": {\\n \\"index.mode\\": \\"time_series\\"\\n },\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"host.name\\": {\\n \\"type\\": \\"keyword\\",\\n \\"time_series_dimension\\": true\\n },\\n \\"packages.sent\\": {\\n \\"type\\": \\"integer\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"memory.usage\\": {\\n \\"type\\": \\"double\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n }\\n }\\n }\\n }\\n}\\n```\\n\\nLet\'s have a closer look at this template. On the top part, we mark the index pattern with metrics-laptop-\\\\*. Any pattern can be selected, but it is recommended to use the [data stream naming scheme](https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme) for all your metrics. The next section sets the \\"index.mode\\": \\"time_series\\" in combination with making sure it is a data_stream: \\"data_stream\\": {}.\\n\\n### Dimensions\\n\\nEach time series data stream needs at least one dimension. In the example above, host.name is set as a dimension field with \\"time_series_dimension\\": true. You can have up to 16 dimensions by default. Not every dimension must show up in each document. The dimensions define the time series. The general rule is to pick fields as dimensions that uniquely identify your time series. Often this is a unique description of the host/container, but for some metrics like disk metrics, the disk id is needed in addition. If you are curious about default recommended dimensions, have a look at this [ECS contribution](https://github.com/elastic/ecs/pull/2172) with dimension properties.\\n\\n## Reduced storage and increased query speed\\n\\nAt this point, you already have a functioning time series data stream. Setting the index mode to time series automatically turns on [synthetic source](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#synthetic-source). By default, Elasticsearch typically duplicates data three times:\\n\\n- [row-oriented storage](https://en.wikipedia.org/wiki/Column-oriented_DBMS#Row-oriented_systems) (\\\\_source field)\\n- [column-oriented storage](https://en.wikipedia.org/wiki/Column-oriented_DBMS#Column-oriented_systems) (doc_values: true for aggregations)\\n- indices (index: true for filtering and search)\\n\\nWith synthetic source, the \\\\_source field is not persisted; instead, it is reconstructed from the doc values. Especially in the metrics use case, there are little benefits to keeping the source.\\n\\nNot storing it means a significant reduction in storage. Time series data streams sort the data based on the dimensions and the time stamp. This means data that is usually queried together is stored together, which speeds up query times. It also means that the data points for a single time series are stored alongside each other on disk. This enables further compression of the data as the rate at which a counter increases is often relatively constant.\\n\\n## Metric types\\n\\nBut to benefit from all the advantages of TSDS, the field properties of the metrics fields must be extended with the `time_series_metric: {type}`. Several [types are supported](https://www.elastic.co/guide/en/elasticsearch/reference/master/tsds.html#time-series-metric) — as an example, gauge and counter were used above. Giving Elasticsearch knowledge about the metric type allows Elasticsearch to offer more optimized queries for the different types and reduce storage usage further.\\n\\nWhen you create your own templates for data streams under the [data stream naming scheme](https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme), it is important that you set \\"priority\\": 200 or higher, as otherwise the built-in default template will apply.\\n\\n## Ingest a document\\n\\nIngesting a document into a TSDS isn\'t in any way different from ingesting documents into Elasticsearch. You can use the following commands in Dev Tools to add a document, and then search for it and also check out the mappings. Note: You have to adjust the @timestamp field to be close to your current date and time.\\n\\n```bash\\n# Add a document with `host.name` as the dimension\\nPOST metrics-laptop-default/_doc\\n{\\n # This timestamp neesd to be adjusted to be current\\n \\"@timestamp\\": \\"2023-03-30T12:26:23+00:00\\",\\n \\"host.name\\": \\"ruflin.com\\",\\n \\"packages.sent\\": 1000,\\n \\"memory.usage\\": 0.8\\n}\\n\\n# Search for the added doc, _source will show up but is reconstructed\\nGET metrics-laptop-default/_search\\n\\n# Check out the mappings\\nGET metrics-laptop-default\\n```\\n\\nIf you do search, it still shows \\\\_source but this is reconstructed from the doc values. The additional field added above is @timestamp. This is important as it is a required field for any data stream.\\n\\n## Why is this all important for Observability?\\n\\nOne of the advantages of the Elastic Observability solution is that in a single storage engine, all signals are brought together in a single place. Users can query logs, metrics, and traces together without having to jump from one system to another. Because of this, having a great storage and query engine not only for logs but also metrics is key for us.\\n\\n## Usage of TSDS in integrations\\n\\nWith [integrations](https://www.elastic.co/integrations/data-integrations), we give our users an out of the box experience to integrate with their infrastructure and services. If you are using our integrations, eventually you will automatically get all the benefits of TSDS for your metrics assuming you are on version 8.7 or newer.\\n\\nCurrently we are working through the list of our integration packages, add the dimensions, metric type fields and then turn on TSDS for the metrics data streams. What this means is as soon as the package has all properties enabled, the only thing you have to do is upgrade the integration and everything else will happen automatically in the background.\\n\\nTo visualize your time series in Kibana, use [Lens](https://www.elastic.co/guide/en/kibana/current/lens.html), which has native support built in for TSDS.\\n\\n## Learn more\\n\\nIf you switch over to TSDS, you will automatically benefit from all the future improvements Elasticsearch is making for metrics time series, be it more efficient storage, query performance, or new aggregation capabilities. If you want to learn more about how TSDS works under the hood and all available config options, check out the [TSDS documentation](https://www.elastic.co/guide/en/elasticsearch/reference/master/tsds.html). What Elasticsearch supports in 8.7 is only the first iteration of the metrics time series in Elasticsearch.\\n\\n[TSDS can be used since 8.7](https://www.elastic.co/blog/whats-new-elasticsearch-8-7-0) and will be in more and more of our integrations automatically when integrations are upgraded. All you will notice is lower storage usage and faster queries. Enjoy!\\n","code":"var Component=(()=>{var m=Object.create;var n=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),y=(i,e)=>{for(var s in e)n(i,s,{get:e[s],enumerable:!0})},o=(i,e,s,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of p(e))!g.call(i,a)&&a!==s&&n(i,a,{get:()=>e[a],enumerable:!(r=u(e,a))||r.enumerable});return i};var b=(i,e,s)=>(s=i!=null?m(f(i)):{},o(e||!i||!i.__esModule?n(s,\\"default\\",{value:i,enumerable:!0}):s,i)),v=i=>o(n({},\\"__esModule\\",{value:!0}),i);var c=w((_,l)=>{l.exports=_jsx_runtime});var T={};y(T,{default:()=>d,frontmatter:()=>S});var t=b(c()),S={title:\\"How to use Elasticsearch and Time Series Data Streams for observability metrics\\",slug:\\"time-series-data-streams-observability-metrics\\",date:\\"2023-05-04\\",description:\\"With Time Series Data Streams (TSDS), Elasticsearch introduces optimized storage for metrics time series. Check out how we use it for Elastic Observability.\\",author:[{slug:\\"nicolas-ruflin\\"}],image:\\"ebpf-monitoring.jpeg\\",tags:[{slug:\\"metrics\\"},{slug:\\"tsdb\\"},{slug:\\"elastic-architecture-enhancements\\"}]};function h(i){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Elasticsearch is used for a wide variety of data types \\\\u2014 one of these is metrics. With the introduction of Metricbeat many years ago and later our APM Agents, the metric use case has become more popular. Over the years, Elasticsearch has made many improvements on how to handle things like metrics aggregations and sparse documents. At the same time, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/tsvb.html\\",rel:\\"nofollow\\",children:\\"TSVB visualizations\\"}),\\" were introduced to make visualizing metrics easier. One concept that was missing that exists for most other metric solutions is the concept of time series with dimensions.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Mid 2021, the Elasticsearch team \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elasticsearch/issues/74660\\",rel:\\"nofollow\\",children:\\"embarked\\"}),\\" on making Elasticsearch a much better fit for metrics. The team created \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/master/tsds.html\\",rel:\\"nofollow\\",children:\\"Time Series Data Streams (TSDS)\\"}),\\", which were released in 8.7 as generally available (GA).\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This blog post dives into how TSDS works and how we use it in Elastic Observability, as well as how you can use it for your own metrics.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"a-quick-introduction-to-tsds\\",children:\\"A quick introduction to TSDS\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/master/tsds.html\\",rel:\\"nofollow\\",children:\\"Time Series Data Streams (TSDS)\\"}),\\" are built on top of data streams in Elasticsearch that are optimized for time series. To create a data stream for metrics, an additional setting on the data stream is needed. As we are using data streams, first an Index Template has to be created:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`PUT _index_template/metrics-laptop\\n{\\n \\"index_patterns\\": [\\n \\"metrics-laptop-*\\"\\n ],\\n \\"data_stream\\": {},\\n \\"priority\\": 200,\\n \\"template\\": {\\n \\"settings\\": {\\n \\"index.mode\\": \\"time_series\\"\\n },\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"host.name\\": {\\n \\"type\\": \\"keyword\\",\\n \\"time_series_dimension\\": true\\n },\\n \\"packages.sent\\": {\\n \\"type\\": \\"integer\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"memory.usage\\": {\\n \\"type\\": \\"double\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Let\'s have a closer look at this template. On the top part, we mark the index pattern with metrics-laptop-*. Any pattern can be selected, but it is recommended to use the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme\\",rel:\\"nofollow\\",children:\\"data stream naming scheme\\"}),\' for all your metrics. The next section sets the \\"index.mode\\": \\"time_series\\" in combination with making sure it is a data_stream: \\"data_stream\\": \',\\".\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"dimensions\\",children:\\"Dimensions\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\'Each time series data stream needs at least one dimension. In the example above, host.name is set as a dimension field with \\"time_series_dimension\\": true. You can have up to 16 dimensions by default. Not every dimension must show up in each document. The dimensions define the time series. The general rule is to pick fields as dimensions that uniquely identify your time series. Often this is a unique description of the host/container, but for some metrics like disk metrics, the disk id is needed in addition. If you are curious about default recommended dimensions, have a look at this \',(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/ecs/pull/2172\\",rel:\\"nofollow\\",children:\\"ECS contribution\\"}),\\" with dimension properties.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"reduced-storage-and-increased-query-speed\\",children:\\"Reduced storage and increased query speed\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"At this point, you already have a functioning time series data stream. Setting the index mode to time series automatically turns on \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#synthetic-source\\",rel:\\"nofollow\\",children:\\"synthetic source\\"}),\\". By default, Elasticsearch typically duplicates data three times:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://en.wikipedia.org/wiki/Column-oriented_DBMS#Row-oriented_systems\\",rel:\\"nofollow\\",children:\\"row-oriented storage\\"}),\\" (_source field)\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://en.wikipedia.org/wiki/Column-oriented_DBMS#Column-oriented_systems\\",rel:\\"nofollow\\",children:\\"column-oriented storage\\"}),\\" (doc_values: true for aggregations)\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"indices (index: true for filtering and search)\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With synthetic source, the _source field is not persisted; instead, it is reconstructed from the doc values. Especially in the metrics use case, there are little benefits to keeping the source.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Not storing it means a significant reduction in storage. Time series data streams sort the data based on the dimensions and the time stamp. This means data that is usually queried together is stored together, which speeds up query times. It also means that the data points for a single time series are stored alongside each other on disk. This enables further compression of the data as the rate at which a counter increases is often relatively constant.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"metric-types\\",children:\\"Metric types\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"But to benefit from all the advantages of TSDS, the field properties of the metrics fields must be extended with the \\",(0,t.jsx)(e.code,{children:\\"time_series_metric: {type}\\"}),\\". Several \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/master/tsds.html#time-series-metric\\",rel:\\"nofollow\\",children:\\"types are supported\\"}),\\" \\\\u2014 as an example, gauge and counter were used above. Giving Elasticsearch knowledge about the metric type allows Elasticsearch to offer more optimized queries for the different types and reduce storage usage further.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"When you create your own templates for data streams under the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme\\",rel:\\"nofollow\\",children:\\"data stream naming scheme\\"}),\', it is important that you set \\"priority\\": 200 or higher, as otherwise the built-in default template will apply.\']}),`\\n`,(0,t.jsx)(e.h2,{id:\\"ingest-a-document\\",children:\\"Ingest a document\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Ingesting a document into a TSDS isn\'t in any way different from ingesting documents into Elasticsearch. You can use the following commands in Dev Tools to add a document, and then search for it and also check out the mappings. Note: You have to adjust the @timestamp field to be close to your current date and time.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`# Add a document with \\\\`host.name\\\\` as the dimension\\nPOST metrics-laptop-default/_doc\\n{\\n # This timestamp neesd to be adjusted to be current\\n \\"@timestamp\\": \\"2023-03-30T12:26:23+00:00\\",\\n \\"host.name\\": \\"ruflin.com\\",\\n \\"packages.sent\\": 1000,\\n \\"memory.usage\\": 0.8\\n}\\n\\n# Search for the added doc, _source will show up but is reconstructed\\nGET metrics-laptop-default/_search\\n\\n# Check out the mappings\\nGET metrics-laptop-default\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you do search, it still shows _source but this is reconstructed from the doc values. The additional field added above is @timestamp. This is important as it is a required field for any data stream.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"why-is-this-all-important-for-observability\\",children:\\"Why is this all important for Observability?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"One of the advantages of the Elastic Observability solution is that in a single storage engine, all signals are brought together in a single place. Users can query logs, metrics, and traces together without having to jump from one system to another. Because of this, having a great storage and query engine not only for logs but also metrics is key for us.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"usage-of-tsds-in-integrations\\",children:\\"Usage of TSDS in integrations\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations\\",rel:\\"nofollow\\",children:\\"integrations\\"}),\\", we give our users an out of the box experience to integrate with their infrastructure and services. If you are using our integrations, eventually you will automatically get all the benefits of TSDS for your metrics assuming you are on version 8.7 or newer.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Currently we are working through the list of our integration packages, add the dimensions, metric type fields and then turn on TSDS for the metrics data streams. What this means is as soon as the package has all properties enabled, the only thing you have to do is upgrade the integration and everything else will happen automatically in the background.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To visualize your time series in Kibana, use \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/lens.html\\",rel:\\"nofollow\\",children:\\"Lens\\"}),\\", which has native support built in for TSDS.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"learn-more\\",children:\\"Learn more\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you switch over to TSDS, you will automatically benefit from all the future improvements Elasticsearch is making for metrics time series, be it more efficient storage, query performance, or new aggregation capabilities. If you want to learn more about how TSDS works under the hood and all available config options, check out the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/master/tsds.html\\",rel:\\"nofollow\\",children:\\"TSDS documentation\\"}),\\". What Elasticsearch supports in 8.7 is only the first iteration of the metrics time series in Elasticsearch.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elasticsearch-8-7-0\\",rel:\\"nofollow\\",children:\\"TSDS can be used since 8.7\\"}),\\" and will be in more and more of our integrations automatically when integrations are upgraded. All you will notice is lower storage usage and faster queries. Enjoy!\\"]})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return v(T);})();\\n;return Component;"},"_id":"articles/elasticsearch-time-series-data-streams-observability-metrics.mdx","_raw":{"sourceFilePath":"articles/elasticsearch-time-series-data-streams-observability-metrics.mdx","sourceFileName":"elasticsearch-time-series-data-streams-observability-metrics.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/elasticsearch-time-series-data-streams-observability-metrics"},"type":"Article","imageUrl":"/assets/images/time-series-data-streams-observability-metrics/ebpf-monitoring.jpeg","readingTime":"7 min read","url":"/time-series-data-streams-observability-metrics","headings":[{"level":2,"title":"A quick introduction to TSDS","href":"#a-quick-introduction-to-tsds"},{"level":3,"title":"Dimensions","href":"#dimensions"},{"level":2,"title":"Reduced storage and increased query speed","href":"#reduced-storage-and-increased-query-speed"},{"level":2,"title":"Metric types","href":"#metric-types"},{"level":2,"title":"Ingest a document","href":"#ingest-a-document"},{"level":2,"title":"Why is this all important for Observability?","href":"#why-is-this-all-important-for-observability"},{"level":2,"title":"Usage of TSDS in integrations","href":"#usage-of-tsds-in-integrations"},{"level":2,"title":"Learn more","href":"#learn-more"}]},{"title":"How to enable Kubernetes alerting with Elastic Observability","slug":"enable-kubernetes-alerting-observability","date":"2023-05-30","description":"In the Kubernetes world, different personas demand different kinds of insights. In this post, we’ll focus on alerting and provide an overview of how alerts in Elastic Observability can help users quickly identify Kubernetes problems.","image":"alert-management.jpg","author":[{"slug":"christos-markou","type":"Author","_raw":{}}],"tags":[{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"slo","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the Kubernetes world, different personas demand different kinds of insights. Developers are interested in granular metrics and debugging information. [SREs](https://www.elastic.co/blog/elastic-observability-sre-incident-response) are interested in seeing everything at once to quickly get notified when a problem occurs and spot where the root cause is. In this post, we’ll focus on alerting and provide an overview of how alerts in Elastic Observability can help users quickly identify Kubernetes problems.\\n\\n## Why do we need alerts?\\n\\nLogs, metrics, and traces are just the base to build a complete [monitoring solution for Kubernetes clusters](https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring). Their main goal is to provide debugging information and historical evidence for the infrastructure.\\n\\nWhile out-of-the-box dashboards, infrastructure topology, and logs exploration through Kibana are already quite handy to perform ad-hoc analyses, adding notifications and active monitoring of infrastructure allows users to deal with problems detected as early as possible and even proactively take actions to prevent their Kubernetes environments from facing even more serious issues.\\n\\n### How can this be achieved?\\n\\nBy building alerts on top of their infrastructure, users can leverage the data and effectively correlate it to a specific notification, creating a wide range of possibilities to dynamically monitor and observe their Kubernetes cluster.\\n\\nIn this blog post, we will explore how users can leverage Elasticsearch’s search powers to define alerting rules in order to be notified when a specific condition occurs.\\n\\n## SLIs, alerts, and SLOs: Why are they important for SREs?\\n\\nFor site reliability engineers (SREs), the [incident response time](https://www.elastic.co/blog/elastic-observability-sre-incident-response) is tightly coupled with the success of everyday work. Monitoring, alerting, and actions will help to discover, resolve, or prevent issues in their systems.\\n\\n> - _An SLA (Service Level Agreement) is an agreement you create with your users to specify the level of service they can expect._\\n> - _An SLO (Service Level Objective) is an agreement within an SLA about a specific metric like uptime or response time._\\n> - _An SLI (Service Level Indicator) measures compliance with an SLO._\\n\\nSREs’ day-to-day tasks and projects are driven by SLOs. By ensuring that SLOs are defended in the short term and that they can be maintained in the medium to long term, we lay the basis of a stable working infrastructure.\\n\\nHaving said this, identifying the high-level categories of SLOs is crucial in order to organize the work of an SRE. Then in each category of SLOs, SREs will need the corresponding SLIs that can cover the most important cases of their system under observation. Therefore, the decision of which SLIs we will need demands additional knowledge of the underlying system infrastructure.\\n\\nOne widely used approach to categorize SLIs and SLOs is the [Four Golden Signals](https://landing.google.com/sre/sre-book/chapters/monitoring-distributed-systems/#xref_monitoring_golden-signals) method. The categories defined are Latency, Traffic, Errors, and Saturation.\\n\\nA more specific approach is the [The RED method](https://thenewstack.io/monitoring-microservices-red-method/) developed by Tom Wilkie, who was an SRE at Google and used the Four Golden Signals. The RED method drops the saturation category because this one is mainly used for more advanced cases — and people remember better things that come in threes.\\n\\nFocusing on Kubernetes infrastructure operators, we will consider the following groups of infrastructure SLIs/SLOs:\\n\\n- Group 1: Latency of control plane (apiserver,\\n- Group 2: Resource utilization of the nodes/pods (how much cpu, memory, etc. is consumed)\\n- Group 3: Errors (errors on logs or events or error count from components, network, etc.)\\n\\n## Creating alerts for a Kubernetes cluster\\n\\nNow that we have a complete outline of our goal to define alerts based on SLIs/SLOs, we will dive into defining the proper alerting. Alerts can be built using [Kibana](https://www.elastic.co/guide/en/kibana/current/alerting-getting-started.html).\\n\\n![kubernetes create rule](/assets/images/enable-kubernetes-alerting-observability/blog-elastic-create-rule.png)\\n\\nSee Elastic [documentation](https://www.elastic.co/guide/en/kibana/current/alerting-getting-started.html).\\n\\nIn this blog, we will define more complex alerts based on complex Elasticsearch queries provided by [Watcher](https://www.elastic.co/guide/en/elasticsearch/reference/current/watcher-getting-started.html)’s functionality. [Read more about Watcher](https://www.elastic.co/guide/en/kibana/8.8/watcher-ui.html) and how to properly use it in addition to the examples in this blog.\\n\\n### Latency alerts\\n\\nFor this kind of alert, we want to define the basic SLOs for a Kubernetes control plane, which will ensure that the basic control plane components can service the end users without an issue. For instance, facing high latencies in queries against the Kubernetes API Server is enough of a signal that action needs to be taken.\\n\\n### Resource saturation\\n\\nThe next group of alerting will be resource utilization. Node’s CPU utilization or changes in Node’s condition is something critical for a cluster to ensure the smooth servicing of the workloads provisioned to run the applications that end users will interact with.\\n\\n### Error detection\\n\\nLast but not least, we will define alerts based on specific errors like the network error rate or Pods’ failures like the OOMKilled situation. It’s a very useful indicator for SRE teams to either detect issues on the infrastructure level or just be able to notify developer teams about problematic workloads. One example that we will examine later is having an application running as a Pod and constantly getting restarted because it hits its memory limit. In that case, the owners of this application will need to get notified to act properly.\\n\\n## From Kubernetes data to Elasticsearch queries\\n\\nHaving a solid plan about the alerts that we want to implement, it\'s time to explore the data we have collected from the Kubernetes cluster and stored in Elasticsearch. For this we will consult the list of the available data fields that are ingested using the Elastic Agent Kubernetes [integration](https://docs.elastic.co/en/integrations/kubernetes) (the full list of fields can be found [here](https://www.elastic.co/guide/en/beats/metricbeat/current/exported-fields-kubernetes.html)). Using these fields we can create various alerts like:\\n\\n- Node CPU utilization\\n- Node Memory utilization\\n- BW utilization\\n- Pod restarts\\n- Pod CPU/memory utilization\\n\\n### CPU utilization alert\\n\\nOur first example will use the CPU utilization fields to calculate the Node’s CPU utilization and create an alert. For this alert, we leverage the metrics:\\n\\n```yaml\\nkubernetes.node.cpu.usage.nanocores\\nkubernetes.node.cpu.capacity.cores.\\n```\\n\\nThe following calculation (nodeUsage / 1000000000 ) /nodeCap grouped by node name will give us the CPU utilization of our cluster’s nodes.\\n\\nThe Watcher definition that implements this query can be created with the following API call to Elasticsearch:\\n\\n```bash\\ncurl -X PUT \\"https://elastic:changeme@localhost:9200/_watcher/watch/Node-CPU-Usage?pretty\\" -k -H \'Content-Type: application/json\' -d\'\\n{\\n \\"trigger\\": {\\n \\"schedule\\": {\\n \\"interval\\": \\"10m\\"\\n }\\n },\\n \\"input\\": {\\n \\"search\\": {\\n \\"request\\": {\\n \\"body\\": {\\n \\"size\\": 0,\\n \\"query\\": {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"range\\": {\\n \\"@timestamp\\": {\\n \\"gte\\": \\"now-10m\\",\\n \\"lte\\": \\"now\\",\\n \\"format\\": \\"strict_date_optional_time\\"\\n }\\n }\\n },\\n {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"query_string\\": {\\n \\"query\\": \\"data_stream.dataset: kubernetes.node OR data_stream.dataset: kubernetes.state_node\\",\\n \\"analyze_wildcard\\": true\\n }\\n }\\n ],\\n \\"filter\\": [],\\n \\"should\\": [],\\n \\"must_not\\": []\\n }\\n }\\n ],\\n \\"filter\\": [],\\n \\"should\\": [],\\n \\"must_not\\": []\\n }\\n },\\n \\"aggs\\": {\\n \\"nodes\\": {\\n \\"terms\\": {\\n \\"field\\": \\"kubernetes.node.name\\",\\n \\"size\\": \\"10000\\",\\n \\"order\\": {\\n \\"_key\\": \\"asc\\"\\n }\\n },\\n \\"aggs\\": {\\n \\"nodeUsage\\": {\\n \\"max\\": {\\n \\"field\\": \\"kubernetes.node.cpu.usage.nanocores\\"\\n }\\n },\\n \\"nodeCap\\": {\\n \\"max\\": {\\n \\"field\\": \\"kubernetes.node.cpu.capacity.cores\\"\\n }\\n },\\n \\"nodeCPUUsagePCT\\": {\\n \\"bucket_script\\": {\\n \\"buckets_path\\": {\\n \\"nodeUsage\\": \\"nodeUsage\\",\\n \\"nodeCap\\": \\"nodeCap\\"\\n },\\n \\"script\\": {\\n \\"source\\": \\"( params.nodeUsage / 1000000000 ) / params.nodeCap\\",\\n \\"lang\\": \\"painless\\",\\n \\"params\\": {\\n \\"_interval\\": 10000\\n }\\n },\\n \\"gap_policy\\": \\"skip\\"\\n }\\n }\\n }\\n }\\n }\\n },\\n \\"indices\\": [\\n \\"metrics-kubernetes*\\"\\n ]\\n }\\n }\\n },\\n \\"condition\\": {\\n \\"array_compare\\": {\\n \\"ctx.payload.aggregations.nodes.buckets\\": {\\n \\"path\\": \\"nodeCPUUsagePCT.value\\",\\n \\"gte\\": {\\n \\"value\\": 80\\n }\\n }\\n }\\n },\\n \\"actions\\": {\\n \\"log_hits\\": {\\n \\"foreach\\": \\"ctx.payload.aggregations.nodes.buckets\\",\\n \\"max_iterations\\": 500,\\n \\"logging\\": {\\n \\"text\\": \\"Kubernetes node found with high CPU usage: {{ctx.payload.key}} -> {{ctx.payload.nodeCPUUsagePCT.value}}\\"\\n }\\n }\\n },\\n \\"metadata\\": {\\n \\"xpack\\": {\\n \\"type\\": \\"json\\"\\n },\\n \\"name\\": \\"Node CPU Usage\\"\\n }\\n}\\n```\\n\\n### OOMKilled Pods detection and alerting\\n\\nAnother Watcher that we will explore is the one that detects Pods that have been restarted due to an OOMKilled error. This error is quite common in Kubernetes workloads and is useful to detect this early on to inform the team that owns this workload, so they can either investigate issues that could cause memory leaks or just consider increasing the required resources for the workload itself.\\n\\nThis information can be retrieved from a query like the following:\\n\\n```yaml\\nkubernetes.container.status.last_terminated_reason: OOMKilled\\n```\\n\\nHere is how we can create the respective Watcher with an API call:\\n\\n```bash\\ncurl -X PUT \\"https://elastic:changeme@localhost:9200/_watcher/watch/Pod-Terminated-OOMKilled?pretty\\" -k -H \'Content-Type: application/json\' -d\'\\n{\\n \\"trigger\\": {\\n \\"schedule\\": {\\n \\"interval\\": \\"1m\\"\\n }\\n },\\n \\"input\\": {\\n \\"search\\": {\\n \\"request\\": {\\n \\"search_type\\": \\"query_then_fetch\\",\\n \\"indices\\": [\\n \\"*\\"\\n ],\\n \\"rest_total_hits_as_int\\": true,\\n \\"body\\": {\\n \\"size\\": 0,\\n \\"query\\": {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"range\\": {\\n \\"@timestamp\\": {\\n \\"gte\\": \\"now-1m\\",\\n \\"lte\\": \\"now\\",\\n \\"format\\": \\"strict_date_optional_time\\"\\n }\\n }\\n },\\n {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"query_string\\": {\\n \\"query\\": \\"data_stream.dataset: kubernetes.state_container\\",\\n \\"analyze_wildcard\\": true\\n }\\n },\\n {\\n \\"exists\\": {\\n \\"field\\": \\"kubernetes.container.status.last_terminated_reason\\"\\n }\\n },\\n {\\n \\"query_string\\": {\\n \\"query\\": \\"kubernetes.container.status.last_terminated_reason: OOMKilled\\",\\n \\"analyze_wildcard\\": true\\n }\\n }\\n ],\\n \\"filter\\": [],\\n \\"should\\": [],\\n \\"must_not\\": []\\n }\\n }\\n ],\\n \\"filter\\": [],\\n \\"should\\": [],\\n \\"must_not\\": []\\n }\\n },\\n \\"aggs\\": {\\n \\"pods\\": {\\n \\"terms\\": {\\n \\"field\\": \\"kubernetes.pod.name\\",\\n \\"order\\": {\\n \\"_key\\": \\"asc\\"\\n }\\n }\\n }\\n }\\n }\\n }\\n }\\n },\\n \\"condition\\": {\\n \\"array_compare\\": {\\n \\"ctx.payload.aggregations.pods.buckets\\": {\\n \\"path\\": \\"doc_count\\",\\n \\"gte\\": {\\n \\"value\\": 1,\\n \\"quantifier\\": \\"some\\"\\n }\\n }\\n }\\n },\\n \\"actions\\": {\\n \\"ping_slack\\": {\\n \\"foreach\\": \\"ctx.payload.aggregations.pods.buckets\\",\\n \\"max_iterations\\": 500,\\n \\"webhook\\": {\\n \\"method\\": \\"POST\\",\\n \\"url\\": \\"https://hooks.slack.com/services/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\\",\\n \\"body\\": \\"{\\\\\\"channel\\\\\\": \\\\\\"#k8s-alerts\\\\\\", \\\\\\"username\\\\\\": \\\\\\"k8s-cluster-alerting\\\\\\", \\\\\\"text\\\\\\": \\\\\\"Pod {{ctx.payload.key}} was terminated with status OOMKilled.\\\\\\"}\\"\\n }\\n }\\n },\\n \\"metadata\\": {\\n \\"xpack\\": {\\n \\"type\\": \\"json\\"\\n },\\n \\"name\\": \\"Pod Terminated OOMKilled\\"\\n }\\n}\\n```\\n\\n### From Kubernetes data to alerts summary\\n\\nSo far we saw how we can start from plain Kubernetes fields, use them in ES queries, and build Watchers and alerts on top of them.\\n\\nOne can explore more possible data combinations and build queries and alerts following the examples we provided here. A [full list of alerts](https://github.com/elastic/integrations/tree/main/packages/kubernetes/docs) is available, as well as a [basic scripted way of installing them](https://github.com/elastic/k8s-integration-infra/tree/main/scripts/alerting).\\n\\nOf course, these examples come with simple actions defined that only log messages into the Elasticsearch logs. However, one can use more advanced and useful outputs like Slack’s webhooks:\\n\\n```json\\n\\"actions\\": {\\n \\"ping_slack\\": {\\n \\"foreach\\": \\"ctx.payload.aggregations.pods.buckets\\",\\n \\"max_iterations\\": 500,\\n \\"webhook\\": {\\n \\"method\\": \\"POST\\",\\n \\"url\\": \\"https://hooks.slack.com/services/T04SW3JHXasdfasdfasdfasdfasdf\\",\\n \\"body\\": \\"{\\\\\\"channel\\\\\\": \\\\\\"#k8s-alerts\\\\\\", \\\\\\"username\\\\\\": \\\\\\"k8s-cluster-alerting\\\\\\", \\\\\\"text\\\\\\": \\\\\\"Pod {{ctx.payload.key}} was terminated with status OOMKilled.\\\\\\"}\\"\\n }\\n }\\n }\\n```\\n\\nThe result would be a Slack message like the following:\\n\\n![](/assets/images/enable-kubernetes-alerting-observability/blog-elastic-k8s-cluster-alerting.png)\\n\\n## Next steps\\n\\nIn our next steps, we would like to make these alerts part of our Kubernetes integration, which would mean that the predefined alerts would be installed when users install or enable the Kubernetes integration. At the same time, we plan to implement some of these as Kibana’s native SLIs, providing the option to our users to quickly define SLOs on top of the SLIs through a nice user interface. If you’re interested to learn more about these, follow the public GitHub issues for more information and feel free to provide your feedback:\\n\\n- [https://github.com/elastic/package-spec/issues/484](https://github.com/elastic/package-spec/issues/484)\\n- [https://github.com/elastic/kibana/issues/150050](https://github.com/elastic/kibana/issues/150050)\\n\\nFor those who are eager to start using Kubernetes alerting today, here is what you need to do:\\n\\n1. Make sure that you have an Elastic cluster up and running. The fastest way to deploy your cluster is to spin up a [free trial of Elasticsearch Service](https://www.elastic.co/elasticsearch/service).\\n2. Install the latest Elastic Agent on your Kubernetes cluster following the respective [documentation](https://www.elastic.co/guide/en/fleet/master/running-on-kubernetes-managed-by-fleet.html).\\n3. Install our provided alerts that can be found at [https://github.com/elastic/integrations/tree/main/packages/kubernetes/docs](https://github.com/elastic/integrations/tree/main/packages/kubernetes/docs) or at [https://github.com/elastic/k8s-integration-infra/tree/main/scripts/alerting](https://github.com/elastic/k8s-integration-infra/tree/main/scripts/alerting).\\n\\nOf course, if you have any questions, remember that we are always happy to help on the Discuss [forums](https://discuss.elastic.co/).\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),b=(t,e)=>{for(var i in e)a(t,i,{get:e[i],enumerable:!0})},o=(t,e,i,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of g(e))!f.call(t,r)&&r!==i&&a(t,r,{get:()=>e[r],enumerable:!(s=p(e,r))||s.enumerable});return t};var y=(t,e,i)=>(i=t!=null?u(m(t)):{},o(e||!t||!t.__esModule?a(i,\\"default\\",{value:t,enumerable:!0}):i,t)),k=t=>o(a({},\\"__esModule\\",{value:!0}),t);var c=w((S,l)=>{l.exports=_jsx_runtime});var _={};b(_,{default:()=>h,frontmatter:()=>v});var n=y(c()),v={title:\\"How to enable Kubernetes alerting with Elastic Observability\\",slug:\\"enable-kubernetes-alerting-observability\\",date:\\"2023-05-30\\",description:\\"In the Kubernetes world, different personas demand different kinds of insights. In this post, we\\\\u2019ll focus on alerting and provide an overview of how alerts in Elastic Observability can help users quickly identify Kubernetes problems.\\",author:[{slug:\\"christos-markou\\"}],image:\\"alert-management.jpg\\",tags:[{slug:\\"kubernetes\\"},{slug:\\"metrics\\"},{slug:\\"slo\\"}]};function d(t){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"In the Kubernetes world, different personas demand different kinds of insights. Developers are interested in granular metrics and debugging information. \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-observability-sre-incident-response\\",rel:\\"nofollow\\",children:\\"SREs\\"}),\\" are interested in seeing everything at once to quickly get notified when a problem occurs and spot where the root cause is. In this post, we\\\\u2019ll focus on alerting and provide an overview of how alerts in Elastic Observability can help users quickly identify Kubernetes problems.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"why-do-we-need-alerts\\",children:\\"Why do we need alerts?\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Logs, metrics, and traces are just the base to build a complete \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring\\",rel:\\"nofollow\\",children:\\"monitoring solution for Kubernetes clusters\\"}),\\". Their main goal is to provide debugging information and historical evidence for the infrastructure.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"While out-of-the-box dashboards, infrastructure topology, and logs exploration through Kibana are already quite handy to perform ad-hoc analyses, adding notifications and active monitoring of infrastructure allows users to deal with problems detected as early as possible and even proactively take actions to prevent their Kubernetes environments from facing even more serious issues.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"how-can-this-be-achieved\\",children:\\"How can this be achieved?\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"By building alerts on top of their infrastructure, users can leverage the data and effectively correlate it to a specific notification, creating a wide range of possibilities to dynamically monitor and observe their Kubernetes cluster.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog post, we will explore how users can leverage Elasticsearch\\\\u2019s search powers to define alerting rules in order to be notified when a specific condition occurs.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"slis-alerts-and-slos-why-are-they-important-for-sres\\",children:\\"SLIs, alerts, and SLOs: Why are they important for SREs?\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"For site reliability engineers (SREs), the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-observability-sre-incident-response\\",rel:\\"nofollow\\",children:\\"incident response time\\"}),\\" is tightly coupled with the success of everyday work. Monitoring, alerting, and actions will help to discover, resolve, or prevent issues in their systems.\\"]}),`\\n`,(0,n.jsxs)(e.blockquote,{children:[`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.em,{children:\\"An SLA (Service Level Agreement) is an agreement you create with your users to specify the level of service they can expect.\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.em,{children:\\"An SLO (Service Level Objective) is an agreement within an SLA about a specific metric like uptime or response time.\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.em,{children:\\"An SLI (Service Level Indicator) measures compliance with an SLO.\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"SREs\\\\u2019 day-to-day tasks and projects are driven by SLOs. By ensuring that SLOs are defended in the short term and that they can be maintained in the medium to long term, we lay the basis of a stable working infrastructure.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Having said this, identifying the high-level categories of SLOs is crucial in order to organize the work of an SRE. Then in each category of SLOs, SREs will need the corresponding SLIs that can cover the most important cases of their system under observation. Therefore, the decision of which SLIs we will need demands additional knowledge of the underlying system infrastructure.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"One widely used approach to categorize SLIs and SLOs is the \\",(0,n.jsx)(e.a,{href:\\"https://landing.google.com/sre/sre-book/chapters/monitoring-distributed-systems/#xref_monitoring_golden-signals\\",rel:\\"nofollow\\",children:\\"Four Golden Signals\\"}),\\" method. The categories defined are Latency, Traffic, Errors, and Saturation.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"A more specific approach is the \\",(0,n.jsx)(e.a,{href:\\"https://thenewstack.io/monitoring-microservices-red-method/\\",rel:\\"nofollow\\",children:\\"The RED method\\"}),\\" developed by Tom Wilkie, who was an SRE at Google and used the Four Golden Signals. The RED method drops the saturation category because this one is mainly used for more advanced cases \\\\u2014 and people remember better things that come in threes.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Focusing on Kubernetes infrastructure operators, we will consider the following groups of infrastructure SLIs/SLOs:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Group 1: Latency of control plane (apiserver,\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Group 2: Resource utilization of the nodes/pods (how much cpu, memory, etc. is consumed)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Group 3: Errors (errors on logs or events or error count from components, network, etc.)\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"creating-alerts-for-a-kubernetes-cluster\\",children:\\"Creating alerts for a Kubernetes cluster\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now that we have a complete outline of our goal to define alerts based on SLIs/SLOs, we will dive into defining the proper alerting. Alerts can be built using \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/alerting-getting-started.html\\",rel:\\"nofollow\\",children:\\"Kibana\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/enable-kubernetes-alerting-observability/blog-elastic-create-rule.png\\",alt:\\"kubernetes create rule\\",width:\\"1999\\",height:\\"1097\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"See Elastic \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/alerting-getting-started.html\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In this blog, we will define more complex alerts based on complex Elasticsearch queries provided by \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/watcher-getting-started.html\\",rel:\\"nofollow\\",children:\\"Watcher\\"}),\\"\\\\u2019s functionality. \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/8.8/watcher-ui.html\\",rel:\\"nofollow\\",children:\\"Read more about Watcher\\"}),\\" and how to properly use it in addition to the examples in this blog.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"latency-alerts\\",children:\\"Latency alerts\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"For this kind of alert, we want to define the basic SLOs for a Kubernetes control plane, which will ensure that the basic control plane components can service the end users without an issue. For instance, facing high latencies in queries against the Kubernetes API Server is enough of a signal that action needs to be taken.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"resource-saturation\\",children:\\"Resource saturation\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The next group of alerting will be resource utilization. Node\\\\u2019s CPU utilization or changes in Node\\\\u2019s condition is something critical for a cluster to ensure the smooth servicing of the workloads provisioned to run the applications that end users will interact with.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"error-detection\\",children:\\"Error detection\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Last but not least, we will define alerts based on specific errors like the network error rate or Pods\\\\u2019 failures like the OOMKilled situation. It\\\\u2019s a very useful indicator for SRE teams to either detect issues on the infrastructure level or just be able to notify developer teams about problematic workloads. One example that we will examine later is having an application running as a Pod and constantly getting restarted because it hits its memory limit. In that case, the owners of this application will need to get notified to act properly.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"from-kubernetes-data-to-elasticsearch-queries\\",children:\\"From Kubernetes data to Elasticsearch queries\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Having a solid plan about the alerts that we want to implement, it\'s time to explore the data we have collected from the Kubernetes cluster and stored in Elasticsearch. For this we will consult the list of the available data fields that are ingested using the Elastic Agent Kubernetes \\",(0,n.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/kubernetes\\",rel:\\"nofollow\\",children:\\"integration\\"}),\\" (the full list of fields can be found \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/current/exported-fields-kubernetes.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\"). Using these fields we can create various alerts like:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Node CPU utilization\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Node Memory utilization\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"BW utilization\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Pod restarts\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Pod CPU/memory utilization\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"cpu-utilization-alert\\",children:\\"CPU utilization alert\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Our first example will use the CPU utilization fields to calculate the Node\\\\u2019s CPU utilization and create an alert. For this alert, we leverage the metrics:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`kubernetes.node.cpu.usage.nanocores\\nkubernetes.node.cpu.capacity.cores.\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The following calculation (nodeUsage / 1000000000 ) /nodeCap grouped by node name will give us the CPU utilization of our cluster\\\\u2019s nodes.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The Watcher definition that implements this query can be created with the following API call to Elasticsearch:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`curl -X PUT \\"https://elastic:changeme@localhost:9200/_watcher/watch/Node-CPU-Usage?pretty\\" -k -H \'Content-Type: application/json\' -d\'\\n{\\n \\"trigger\\": {\\n \\"schedule\\": {\\n \\"interval\\": \\"10m\\"\\n }\\n },\\n \\"input\\": {\\n \\"search\\": {\\n \\"request\\": {\\n \\"body\\": {\\n \\"size\\": 0,\\n \\"query\\": {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"range\\": {\\n \\"@timestamp\\": {\\n \\"gte\\": \\"now-10m\\",\\n \\"lte\\": \\"now\\",\\n \\"format\\": \\"strict_date_optional_time\\"\\n }\\n }\\n },\\n {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"query_string\\": {\\n \\"query\\": \\"data_stream.dataset: kubernetes.node OR data_stream.dataset: kubernetes.state_node\\",\\n \\"analyze_wildcard\\": true\\n }\\n }\\n ],\\n \\"filter\\": [],\\n \\"should\\": [],\\n \\"must_not\\": []\\n }\\n }\\n ],\\n \\"filter\\": [],\\n \\"should\\": [],\\n \\"must_not\\": []\\n }\\n },\\n \\"aggs\\": {\\n \\"nodes\\": {\\n \\"terms\\": {\\n \\"field\\": \\"kubernetes.node.name\\",\\n \\"size\\": \\"10000\\",\\n \\"order\\": {\\n \\"_key\\": \\"asc\\"\\n }\\n },\\n \\"aggs\\": {\\n \\"nodeUsage\\": {\\n \\"max\\": {\\n \\"field\\": \\"kubernetes.node.cpu.usage.nanocores\\"\\n }\\n },\\n \\"nodeCap\\": {\\n \\"max\\": {\\n \\"field\\": \\"kubernetes.node.cpu.capacity.cores\\"\\n }\\n },\\n \\"nodeCPUUsagePCT\\": {\\n \\"bucket_script\\": {\\n \\"buckets_path\\": {\\n \\"nodeUsage\\": \\"nodeUsage\\",\\n \\"nodeCap\\": \\"nodeCap\\"\\n },\\n \\"script\\": {\\n \\"source\\": \\"( params.nodeUsage / 1000000000 ) / params.nodeCap\\",\\n \\"lang\\": \\"painless\\",\\n \\"params\\": {\\n \\"_interval\\": 10000\\n }\\n },\\n \\"gap_policy\\": \\"skip\\"\\n }\\n }\\n }\\n }\\n }\\n },\\n \\"indices\\": [\\n \\"metrics-kubernetes*\\"\\n ]\\n }\\n }\\n },\\n \\"condition\\": {\\n \\"array_compare\\": {\\n \\"ctx.payload.aggregations.nodes.buckets\\": {\\n \\"path\\": \\"nodeCPUUsagePCT.value\\",\\n \\"gte\\": {\\n \\"value\\": 80\\n }\\n }\\n }\\n },\\n \\"actions\\": {\\n \\"log_hits\\": {\\n \\"foreach\\": \\"ctx.payload.aggregations.nodes.buckets\\",\\n \\"max_iterations\\": 500,\\n \\"logging\\": {\\n \\"text\\": \\"Kubernetes node found with high CPU usage: {{ctx.payload.key}} -> {{ctx.payload.nodeCPUUsagePCT.value}}\\"\\n }\\n }\\n },\\n \\"metadata\\": {\\n \\"xpack\\": {\\n \\"type\\": \\"json\\"\\n },\\n \\"name\\": \\"Node CPU Usage\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"oomkilled-pods-detection-and-alerting\\",children:\\"OOMKilled Pods detection and alerting\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Another Watcher that we will explore is the one that detects Pods that have been restarted due to an OOMKilled error. This error is quite common in Kubernetes workloads and is useful to detect this early on to inform the team that owns this workload, so they can either investigate issues that could cause memory leaks or just consider increasing the required resources for the workload itself.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This information can be retrieved from a query like the following:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`kubernetes.container.status.last_terminated_reason: OOMKilled\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here is how we can create the respective Watcher with an API call:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`curl -X PUT \\"https://elastic:changeme@localhost:9200/_watcher/watch/Pod-Terminated-OOMKilled?pretty\\" -k -H \'Content-Type: application/json\' -d\'\\n{\\n \\"trigger\\": {\\n \\"schedule\\": {\\n \\"interval\\": \\"1m\\"\\n }\\n },\\n \\"input\\": {\\n \\"search\\": {\\n \\"request\\": {\\n \\"search_type\\": \\"query_then_fetch\\",\\n \\"indices\\": [\\n \\"*\\"\\n ],\\n \\"rest_total_hits_as_int\\": true,\\n \\"body\\": {\\n \\"size\\": 0,\\n \\"query\\": {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"range\\": {\\n \\"@timestamp\\": {\\n \\"gte\\": \\"now-1m\\",\\n \\"lte\\": \\"now\\",\\n \\"format\\": \\"strict_date_optional_time\\"\\n }\\n }\\n },\\n {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"query_string\\": {\\n \\"query\\": \\"data_stream.dataset: kubernetes.state_container\\",\\n \\"analyze_wildcard\\": true\\n }\\n },\\n {\\n \\"exists\\": {\\n \\"field\\": \\"kubernetes.container.status.last_terminated_reason\\"\\n }\\n },\\n {\\n \\"query_string\\": {\\n \\"query\\": \\"kubernetes.container.status.last_terminated_reason: OOMKilled\\",\\n \\"analyze_wildcard\\": true\\n }\\n }\\n ],\\n \\"filter\\": [],\\n \\"should\\": [],\\n \\"must_not\\": []\\n }\\n }\\n ],\\n \\"filter\\": [],\\n \\"should\\": [],\\n \\"must_not\\": []\\n }\\n },\\n \\"aggs\\": {\\n \\"pods\\": {\\n \\"terms\\": {\\n \\"field\\": \\"kubernetes.pod.name\\",\\n \\"order\\": {\\n \\"_key\\": \\"asc\\"\\n }\\n }\\n }\\n }\\n }\\n }\\n }\\n },\\n \\"condition\\": {\\n \\"array_compare\\": {\\n \\"ctx.payload.aggregations.pods.buckets\\": {\\n \\"path\\": \\"doc_count\\",\\n \\"gte\\": {\\n \\"value\\": 1,\\n \\"quantifier\\": \\"some\\"\\n }\\n }\\n }\\n },\\n \\"actions\\": {\\n \\"ping_slack\\": {\\n \\"foreach\\": \\"ctx.payload.aggregations.pods.buckets\\",\\n \\"max_iterations\\": 500,\\n \\"webhook\\": {\\n \\"method\\": \\"POST\\",\\n \\"url\\": \\"https://hooks.slack.com/services/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\\",\\n \\"body\\": \\"{\\\\\\\\\\"channel\\\\\\\\\\": \\\\\\\\\\"#k8s-alerts\\\\\\\\\\", \\\\\\\\\\"username\\\\\\\\\\": \\\\\\\\\\"k8s-cluster-alerting\\\\\\\\\\", \\\\\\\\\\"text\\\\\\\\\\": \\\\\\\\\\"Pod {{ctx.payload.key}} was terminated with status OOMKilled.\\\\\\\\\\"}\\"\\n }\\n }\\n },\\n \\"metadata\\": {\\n \\"xpack\\": {\\n \\"type\\": \\"json\\"\\n },\\n \\"name\\": \\"Pod Terminated OOMKilled\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"from-kubernetes-data-to-alerts-summary\\",children:\\"From Kubernetes data to alerts summary\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"So far we saw how we can start from plain Kubernetes fields, use them in ES queries, and build Watchers and alerts on top of them.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"One can explore more possible data combinations and build queries and alerts following the examples we provided here. A \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/integrations/tree/main/packages/kubernetes/docs\\",rel:\\"nofollow\\",children:\\"full list of alerts\\"}),\\" is available, as well as a \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/k8s-integration-infra/tree/main/scripts/alerting\\",rel:\\"nofollow\\",children:\\"basic scripted way of installing them\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Of course, these examples come with simple actions defined that only log messages into the Elasticsearch logs. However, one can use more advanced and useful outputs like Slack\\\\u2019s webhooks:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`\\"actions\\": {\\n \\"ping_slack\\": {\\n \\"foreach\\": \\"ctx.payload.aggregations.pods.buckets\\",\\n \\"max_iterations\\": 500,\\n \\"webhook\\": {\\n \\"method\\": \\"POST\\",\\n \\"url\\": \\"https://hooks.slack.com/services/T04SW3JHXasdfasdfasdfasdfasdf\\",\\n \\"body\\": \\"{\\\\\\\\\\"channel\\\\\\\\\\": \\\\\\\\\\"#k8s-alerts\\\\\\\\\\", \\\\\\\\\\"username\\\\\\\\\\": \\\\\\\\\\"k8s-cluster-alerting\\\\\\\\\\", \\\\\\\\\\"text\\\\\\\\\\": \\\\\\\\\\"Pod {{ctx.payload.key}} was terminated with status OOMKilled.\\\\\\\\\\"}\\"\\n }\\n }\\n }\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The result would be a Slack message like the following:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/enable-kubernetes-alerting-observability/blog-elastic-k8s-cluster-alerting.png\\",alt:\\"\\",width:\\"453\\",height:\\"52\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"next-steps\\",children:\\"Next steps\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In our next steps, we would like to make these alerts part of our Kubernetes integration, which would mean that the predefined alerts would be installed when users install or enable the Kubernetes integration. At the same time, we plan to implement some of these as Kibana\\\\u2019s native SLIs, providing the option to our users to quickly define SLOs on top of the SLIs through a nice user interface. If you\\\\u2019re interested to learn more about these, follow the public GitHub issues for more information and feel free to provide your feedback:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/package-spec/issues/484\\",rel:\\"nofollow\\",children:\\"https://github.com/elastic/package-spec/issues/484\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/kibana/issues/150050\\",rel:\\"nofollow\\",children:\\"https://github.com/elastic/kibana/issues/150050\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"For those who are eager to start using Kubernetes alerting today, here is what you need to do:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Make sure that you have an Elastic cluster up and running. The fastest way to deploy your cluster is to spin up a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/elasticsearch/service\\",rel:\\"nofollow\\",children:\\"free trial of Elasticsearch Service\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Install the latest Elastic Agent on your Kubernetes cluster following the respective \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/master/running-on-kubernetes-managed-by-fleet.html\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Install our provided alerts that can be found at \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/integrations/tree/main/packages/kubernetes/docs\\",rel:\\"nofollow\\",children:\\"https://github.com/elastic/integrations/tree/main/packages/kubernetes/docs\\"}),\\" or at \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/k8s-integration-infra/tree/main/scripts/alerting\\",rel:\\"nofollow\\",children:\\"https://github.com/elastic/k8s-integration-infra/tree/main/scripts/alerting\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Of course, if you have any questions, remember that we are always happy to help on the Discuss \\",(0,n.jsx)(e.a,{href:\\"https://discuss.elastic.co/\\",rel:\\"nofollow\\",children:\\"forums\\"}),\\".\\"]})]})}function h(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(d,{...t})}):d(t)}return k(_);})();\\n;return Component;"},"_id":"articles/enable-kubernetes-alerting-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/enable-kubernetes-alerting-elastic-observability.mdx","sourceFileName":"enable-kubernetes-alerting-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/enable-kubernetes-alerting-elastic-observability"},"type":"Article","imageUrl":"/assets/images/enable-kubernetes-alerting-observability/alert-management.jpg","readingTime":"24 min read","url":"/enable-kubernetes-alerting-observability","headings":[{"level":2,"title":"Why do we need alerts?","href":"#why-do-we-need-alerts"},{"level":3,"title":"How can this be achieved?","href":"#how-can-this-be-achieved"},{"level":2,"title":"SLIs, alerts, and SLOs: Why are they important for SREs?","href":"#slis-alerts-and-slos-why-are-they-important-for-sres"},{"level":2,"title":"Creating alerts for a Kubernetes cluster","href":"#creating-alerts-for-a-kubernetes-cluster"},{"level":3,"title":"Latency alerts","href":"#latency-alerts"},{"level":3,"title":"Resource saturation","href":"#resource-saturation"},{"level":3,"title":"Error detection","href":"#error-detection"},{"level":2,"title":"From Kubernetes data to Elasticsearch queries","href":"#from-kubernetes-data-to-elasticsearch-queries"},{"level":3,"title":"CPU utilization alert","href":"#cpu-utilization-alert"},{"level":3,"title":"OOMKilled Pods detection and alerting","href":"#oomkilled-pods-detection-and-alerting"},{"level":3,"title":"From Kubernetes data to alerts summary","href":"#from-kubernetes-data-to-alerts-summary"},{"level":2,"title":"Next steps","href":"#next-steps"}]},{"title":"OpenTelemetry Demo with the Elastic Distributions of OpenTelemetry","slug":"opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry","date":"2024-10-07","description":"Discover how Elastic is dedicated to supporting users in their journey with OpenTelemetry. Explore our public deployment of the OpenTelemetry Demo and see how Elastic\'s solutions enhance your observability experience.","image":"elastic-oteldemo.jpg","author":[{"slug":"roger-coll","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}}],"body":{"raw":"\\n\\nRecently, Elastic [introduced the Elastic Distributions\\n(EDOT)](https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry)\\nfor various OpenTelemetry components, we are proud to announce that these EDOT\\ncomponents are now available in the [Elastic\'s fork of the OpenTelemetry\\nDemo](https://github.com/elastic/opentelemetry-demo). We\'ve also made public a\\n[Kibana endpoint](https://ela.st/demo-otel), allowing you to dive into the\\ndemo’s live data and explore its capabilities firsthand. In this blog post,\\nwe\'ll elaborate on the reasons behind the fork and explore the powerful new\\nfeatures it introduces. We\'ll also provide a comprehensive overview of how\\nthese enhancements can be leveraged with the Elastic Distributions of\\nOpenTelemetry (EDOT) for advanced error detection, as well as the EDOT\\nCollector—a cutting-edge evolution of the Elastic Agent—for seamless data\\ncollection and analysis.\\n\\n## What is the OpenTelemetry Demo?\\n\\nThe [OpenTelemetry Demo](https://github.com/open-telemetry/opentelemetry-demo)\\nis a microservices-based application created by OpenTelemetry\'s community to\\nshowcase its capabilities in a realistic, and distributed system environment.\\nThis demo application, known as the OpenTelemetry Astronomy Shop, simulates an\\ne-commerce website composed of over 10 interconnected microservices (written in\\nmultiple languages: Go, Java, .NET, Node.js, etc.), communicating via HTTP and\\ngRPC. Each service is fully instrumented with OpenTelemetry, generating\\ncomprehensive traces, metrics, and logs. The demo serves as an invaluable\\nresource for understanding how to implement and use OpenTelemetry in real-world\\napplications.\\n\\n![1 - Service Map for the OpenTelemetry Demo Elastic\\nfork](/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/opentelemetry_demo_service_map.png)\\n\\nOne of the microservices, called `loadgenerator`, automatically starts\\ngenerating requests to the various endpoints of the demo, simulating a\\nreal-world environment where multiple clients are interacting with the system.\\nThis helps replicate the behavior of a busy, live application with concurrent\\nuser activity.\\n\\n### Elastic\'s fork\\n\\nElastic recognized an opportunity to enhance the OpenTelemetry Demo by forking\\nit and integrating advanced Elastic features for deeper observability and\\nsimpler monitoring. While forking is the [recommended OpenTelemetry\\napproach](https://github.com/open-telemetry/opentelemetry-demo?tab=readme-ov-file#demos-featuring-the-astronomy-shop),\\nwe aim to leverage the robust foundation and latest updates from the upstream\\nversion as much as possible. To achieve this, Elastic’s fork of the\\nOpenTelemetry Demo performs daily pulls from upstream, seamlessly integrating\\nthem with Elastic-specific changes. To avoid conflicts, we continuously\\ncontribute upstream, ensuring Elastic\'s modifications are always additive or\\nconfigurable through environment variables. One such contribution is the\\n[.env.override\\nfile](https://github.com/elastic/opentelemetry-demo/blob/main/.env.override),\\ndesigned exclusively for vendor forks to override the microservices images and\\nconfiguration files used in the demo.\\n\\n## Deeper Insights with Elastic Distributions\\n\\nIn our current update of Elastic\'s OpenTelemetry Demo fork, we have replaced\\nsome of the microservices OTel SDKs used for instrumentation with Elastic\'s\\nspecialized distributions. These changes ensure deeper integration with\\nElastic\'s observability tools, offering richer insights and more robust\\nmonitoring capabilities. These are some of the fork\'s changes:\\n\\n**Java services:** The Ad, Fraud Detection, and Kafka services now utilize the\\nElastic distribution of the OpenTelemetry Java Agent. One of the included\\nfeatures in the distribution are stack traces, which provides precise\\ninformation of where in the code path a span was originated. Learn more about\\nthe Elastic Java Agent\\n[here](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent).\\n\\n![2 - Ad Service span stack trace\\nexample](/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/adservice_span_stacktrace.png)\\n\\nThe **Cart service** has been upgraded to use the Elastic distribution of the\\nOpenTelemetry .NET Agent. This replacement gives visibility on how the Elastic\\nDistribution of OpenTelemetry .NET (EDOT .NET) can be used to get started using\\nOpenTelemetry in your .NET applications with zero code changes. Discover more\\nabout the Elastic .NET Agent in [this blog\\npost](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications).\\n\\nIn the **Payment service**, we\'ve configured the Elastic distribution of the\\nOpenTelemetry Node.js Agent. The distribution ships with the host-metrics\\nextension, and Kibana provides a curated service metrics UI. Read more about\\nthe Elastic Node.js Agent\\n[here](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js).\\n\\n![3 - Payment service host\\nmetrics](/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/payment_service_host_metrics.png)\\n\\nThe **Recommendation service** now leverages the EDOT Python, replacing the\\nstandard OpenTelemetry Python agent. The Python distribution is another example\\nof a Zero-code (or Automatic) instrumentation, meaning that the distribution\\nwill set up the OpenTelemetry SDK and enable all the recommended\\ninstrumentations for you. Find out more about the Elastic Python Agent in [this\\nblog\\npost](https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python).\\n\\nIt\'s important to highlight that Elastic Distributions of OpenTelemetry don\'t\\nbundle proprietary software, they have been build on top of the vanilla OTel\\nSDKs but they offer some advantages, such as single package for installation,\\neasy auto-instrumentation with reasonable default configuration, automatic logs\\ntelemetry sending, and many more. Along these lines, the ultimate goal is to\\ncontribute as many features from EDOT\'s back to the upstream OpenTelemetry\\nagents; they are designed in such a way that the additional features, realized\\nas extensions, work directly with the OTel SDKs.\\n\\n\\n## Collecting Data with the Elastic Collector Distribution\\n\\nThe OpenTelemetry Demo applications generate and send their signals to an\\nOpenTelemetry Collector OTLP endpoint. In the Demo\'s fork, the EDOT collector\\nis set up to forward all OTLP signals from the microservices to an [APM\\nserver](https://www.elastic.co/guide/en/observability/current/apm.html) OTLP\\nendpoint. Additionally, it sends all other metrics and logs collected by the\\ncollector to an Elasticsearch endpoint.\\n\\nIf the fork is deployed in a Kubernetes environment, the collector will\\nautomatically start collecting the system\'s metrics. The collector will be\\nconfigured to use the [hostmetrics\\nreceivers](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/hostmetricsreceiver)\\nto monitor all the K8s node\'s metrics, the [kuebeletstats\\nreceiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver)\\nto retrieve Kubelet\'s metrics and the [filelog\\nreceiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/filelogreceiver),\\nthat will collect all cluster\'s.\\n\\n![4 - Host\\nmetrics](/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/node_host_metrics.png)\\n\\nBoth the signals generated by the microservices and those collected by the EDOT\\ncollector are enriched with Kubernetes metadata, allowing users to correlate\\nthem seamlessly. This makes it easy to track and observe which Kubernetes nodes\\nand pods each service is running on, providing deep insights into both\\napplication performance and infrastructure health.\\n\\nLearn more about the Elastic\'s OpenTelemetry Collector distribution:\\nhttps://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector\\n\\n\\n## Error detection with Elastic\\n\\nThe OpenTelemetry Demo incorporates [flagd](https://flagd.dev/), a feature flag\\nevaluation engine used to simulate error scenarios. For example, the\\n`paymentServiceFailure` flag will force an error for every request to the\\npayment service `charge` endpoint. Since the service is instrumented with\\nOpenTelemetry, the error will be captured in the generated traces. We can then\\nuse Kibana\'s powerful visualization and search tools to trace the error back to\\nits root cause.\\n\\n![5 - Payment service\\nerror](/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/payment_error.png)\\n![6 - Payment service trace\\nerror](/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/payment_trace_error.png)\\n\\nAnother available flag is named `adServiceHighCpu`, which causes a high CPU\\nload in the ad service. This increased CPU usage can be monitored either\\nthrough the service\'s metrics or the related metrics of its Kubernetes pod:\\n\\n![7 - AdService High CPU\\nerror](/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/adservice_high_cpu_error.png)\\n\\nThe full list of simulated scenarios can be found at [this\\nlink](https://opentelemetry.io/docs/demo/feature-flags/).\\n\\n## Start your own exploration\\n\\nReady to explore the OpenTelemetry Demo with Elastic and its enhanced\\nobservability capabilities? Follow the link to Kibana and begin your own\\nexploration of how Elastic and OpenTelemetry can transform your approach to\\nobservability.\\n\\nLive demo: https://ela.st/demo-otel\\n\\nBut that\'s not all—if you want to take it a step further, you can deploy the\\nOpenTelemetry Demo directly with your own Elasticsearch stack. Follow the steps\\nprovided [here](https://github.com/elastic/opentelemetry-demo) to set it up and\\nstart gaining valuable insights from your own environment.\\n","code":"var Component=(()=>{var m=Object.create;var r=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var i in e)r(n,i,{get:e[i],enumerable:!0})},a=(n,e,i,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of u(e))!y.call(n,o)&&o!==i&&r(n,o,{get:()=>e[o],enumerable:!(s=p(e,o))||s.enumerable});return n};var w=(n,e,i)=>(i=n!=null?m(g(n)):{},a(e||!n||!n.__esModule?r(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>a(r({},\\"__esModule\\",{value:!0}),n);var c=f((D,l)=>{l.exports=_jsx_runtime});var E={};b(E,{default:()=>d,frontmatter:()=>T});var t=w(c()),T={title:\\"OpenTelemetry Demo with the Elastic Distributions of OpenTelemetry\\",slug:\\"opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry\\",date:\\"2024-10-07\\",description:\\"Discover how Elastic is dedicated to supporting users in their journey with OpenTelemetry. Explore our public deployment of the OpenTelemetry Demo and see how Elastic\'s solutions enhance your observability experience.\\",author:[{slug:\\"roger-coll\\"}],image:\\"elastic-oteldemo.jpg\\",tags:[{slug:\\"opentelemetry\\"}]};function h(n){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",p:\\"p\\",strong:\\"strong\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Recently, Elastic \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry\\",rel:\\"nofollow\\",children:`introduced the Elastic Distributions\\n(EDOT)`}),`\\nfor various OpenTelemetry components, we are proud to announce that these EDOT\\ncomponents are now available in the `,(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:`Elastic\'s fork of the OpenTelemetry\\nDemo`}),`. We\'ve also made public a\\n`,(0,t.jsx)(e.a,{href:\\"https://ela.st/demo-otel\\",rel:\\"nofollow\\",children:\\"Kibana endpoint\\"}),`, allowing you to dive into the\\ndemo\\\\u2019s live data and explore its capabilities firsthand. In this blog post,\\nwe\'ll elaborate on the reasons behind the fork and explore the powerful new\\nfeatures it introduces. We\'ll also provide a comprehensive overview of how\\nthese enhancements can be leveraged with the Elastic Distributions of\\nOpenTelemetry (EDOT) for advanced error detection, as well as the EDOT\\nCollector\\\\u2014a cutting-edge evolution of the Elastic Agent\\\\u2014for seamless data\\ncollection and analysis.`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-is-the-opentelemetry-demo\\",children:\\"What is the OpenTelemetry Demo?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Demo\\"}),`\\nis a microservices-based application created by OpenTelemetry\'s community to\\nshowcase its capabilities in a realistic, and distributed system environment.\\nThis demo application, known as the OpenTelemetry Astronomy Shop, simulates an\\ne-commerce website composed of over 10 interconnected microservices (written in\\nmultiple languages: Go, Java, .NET, Node.js, etc.), communicating via HTTP and\\ngRPC. Each service is fully instrumented with OpenTelemetry, generating\\ncomprehensive traces, metrics, and logs. The demo serves as an invaluable\\nresource for understanding how to implement and use OpenTelemetry in real-world\\napplications.`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/opentelemetry_demo_service_map.png\\",alt:`1 - Service Map for the OpenTelemetry Demo Elastic\\nfork`,width:\\"1213\\",height:\\"1152\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"One of the microservices, called \\",(0,t.jsx)(e.code,{children:\\"loadgenerator\\"}),`, automatically starts\\ngenerating requests to the various endpoints of the demo, simulating a\\nreal-world environment where multiple clients are interacting with the system.\\nThis helps replicate the behavior of a busy, live application with concurrent\\nuser activity.`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"elastics-fork\\",children:\\"Elastic\'s fork\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[`Elastic recognized an opportunity to enhance the OpenTelemetry Demo by forking\\nit and integrating advanced Elastic features for deeper observability and\\nsimpler monitoring. While forking is the `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo?tab=readme-ov-file#demos-featuring-the-astronomy-shop\\",rel:\\"nofollow\\",children:`recommended OpenTelemetry\\napproach`}),`,\\nwe aim to leverage the robust foundation and latest updates from the upstream\\nversion as much as possible. To achieve this, Elastic\\\\u2019s fork of the\\nOpenTelemetry Demo performs daily pulls from upstream, seamlessly integrating\\nthem with Elastic-specific changes. To avoid conflicts, we continuously\\ncontribute upstream, ensuring Elastic\'s modifications are always additive or\\nconfigurable through environment variables. One such contribution is the\\n`,(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo/blob/main/.env.override\\",rel:\\"nofollow\\",children:`.env.override\\nfile`}),`,\\ndesigned exclusively for vendor forks to override the microservices images and\\nconfiguration files used in the demo.`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"deeper-insights-with-elastic-distributions\\",children:\\"Deeper Insights with Elastic Distributions\\"}),`\\n`,(0,t.jsx)(e.p,{children:`In our current update of Elastic\'s OpenTelemetry Demo fork, we have replaced\\nsome of the microservices OTel SDKs used for instrumentation with Elastic\'s\\nspecialized distributions. These changes ensure deeper integration with\\nElastic\'s observability tools, offering richer insights and more robust\\nmonitoring capabilities. These are some of the fork\'s changes:`}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Java services:\\"}),` The Ad, Fraud Detection, and Kafka services now utilize the\\nElastic distribution of the OpenTelemetry Java Agent. One of the included\\nfeatures in the distribution are stack traces, which provides precise\\ninformation of where in the code path a span was originated. Learn more about\\nthe Elastic Java Agent\\n`,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/adservice_span_stacktrace.png\\",alt:`2 - Ad Service span stack trace\\nexample`,width:\\"1500\\",height:\\"700\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.strong,{children:\\"Cart service\\"}),` has been upgraded to use the Elastic distribution of the\\nOpenTelemetry .NET Agent. This replacement gives visibility on how the Elastic\\nDistribution of OpenTelemetry .NET (EDOT .NET) can be used to get started using\\nOpenTelemetry in your .NET applications with zero code changes. Discover more\\nabout the Elastic .NET Agent in `,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications\\",rel:\\"nofollow\\",children:`this blog\\npost`}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the \\",(0,t.jsx)(e.strong,{children:\\"Payment service\\"}),`, we\'ve configured the Elastic distribution of the\\nOpenTelemetry Node.js Agent. The distribution ships with the host-metrics\\nextension, and Kibana provides a curated service metrics UI. Read more about\\nthe Elastic Node.js Agent\\n`,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/payment_service_host_metrics.png\\",alt:`3 - Payment service host\\nmetrics`,width:\\"1600\\",height:\\"412\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.strong,{children:\\"Recommendation service\\"}),` now leverages the EDOT Python, replacing the\\nstandard OpenTelemetry Python agent. The Python distribution is another example\\nof a Zero-code (or Automatic) instrumentation, meaning that the distribution\\nwill set up the OpenTelemetry SDK and enable all the recommended\\ninstrumentations for you. Find out more about the Elastic Python Agent in `,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-python\\",rel:\\"nofollow\\",children:`this\\nblog\\npost`}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:`It\'s important to highlight that Elastic Distributions of OpenTelemetry don\'t\\nbundle proprietary software, they have been build on top of the vanilla OTel\\nSDKs but they offer some advantages, such as single package for installation,\\neasy auto-instrumentation with reasonable default configuration, automatic logs\\ntelemetry sending, and many more. Along these lines, the ultimate goal is to\\ncontribute as many features from EDOT\'s back to the upstream OpenTelemetry\\nagents; they are designed in such a way that the additional features, realized\\nas extensions, work directly with the OTel SDKs.`}),`\\n`,(0,t.jsx)(e.h2,{id:\\"collecting-data-with-the-elastic-collector-distribution\\",children:\\"Collecting Data with the Elastic Collector Distribution\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[`The OpenTelemetry Demo applications generate and send their signals to an\\nOpenTelemetry Collector OTLP endpoint. In the Demo\'s fork, the EDOT collector\\nis set up to forward all OTLP signals from the microservices to an `,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm.html\\",rel:\\"nofollow\\",children:`APM\\nserver`}),` OTLP\\nendpoint. Additionally, it sends all other metrics and logs collected by the\\ncollector to an Elasticsearch endpoint.`]}),`\\n`,(0,t.jsxs)(e.p,{children:[`If the fork is deployed in a Kubernetes environment, the collector will\\nautomatically start collecting the system\'s metrics. The collector will be\\nconfigured to use the `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/hostmetricsreceiver\\",rel:\\"nofollow\\",children:`hostmetrics\\nreceivers`}),`\\nto monitor all the K8s node\'s metrics, the `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/kubeletstatsreceiver\\",rel:\\"nofollow\\",children:`kuebeletstats\\nreceiver`}),`\\nto retrieve Kubelet\'s metrics and the `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/filelogreceiver\\",rel:\\"nofollow\\",children:`filelog\\nreceiver`}),`,\\nthat will collect all cluster\'s.`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/node_host_metrics.png\\",alt:`4 - Host\\nmetrics`,width:\\"1178\\",height:\\"913\\"})}),`\\n`,(0,t.jsx)(e.p,{children:`Both the signals generated by the microservices and those collected by the EDOT\\ncollector are enriched with Kubernetes metadata, allowing users to correlate\\nthem seamlessly. This makes it easy to track and observe which Kubernetes nodes\\nand pods each service is running on, providing deep insights into both\\napplication performance and infrastructure health.`}),`\\n`,(0,t.jsxs)(e.p,{children:[`Learn more about the Elastic\'s OpenTelemetry Collector distribution:\\n`,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-collector\\"})]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"error-detection-with-elastic\\",children:\\"Error detection with Elastic\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The OpenTelemetry Demo incorporates \\",(0,t.jsx)(e.a,{href:\\"https://flagd.dev/\\",rel:\\"nofollow\\",children:\\"flagd\\"}),`, a feature flag\\nevaluation engine used to simulate error scenarios. For example, the\\n`,(0,t.jsx)(e.code,{children:\\"paymentServiceFailure\\"}),` flag will force an error for every request to the\\npayment service `,(0,t.jsx)(e.code,{children:\\"charge\\"}),` endpoint. Since the service is instrumented with\\nOpenTelemetry, the error will be captured in the generated traces. We can then\\nuse Kibana\'s powerful visualization and search tools to trace the error back to\\nits root cause.`]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/payment_error.png\\",alt:`5 - Payment service\\nerror`,width:\\"1600\\",height:\\"750\\"}),`\\n`,(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/payment_trace_error.png\\",alt:`6 - Payment service trace\\nerror`,width:\\"1600\\",height:\\"805\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Another available flag is named \\",(0,t.jsx)(e.code,{children:\\"adServiceHighCpu\\"}),`, which causes a high CPU\\nload in the ad service. This increased CPU usage can be monitored either\\nthrough the service\'s metrics or the related metrics of its Kubernetes pod:`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/adservice_high_cpu_error.png\\",alt:`7 - AdService High CPU\\nerror`,width:\\"1600\\",height:\\"598\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The full list of simulated scenarios can be found at \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/demo/feature-flags/\\",rel:\\"nofollow\\",children:`this\\nlink`}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"start-your-own-exploration\\",children:\\"Start your own exploration\\"}),`\\n`,(0,t.jsx)(e.p,{children:`Ready to explore the OpenTelemetry Demo with Elastic and its enhanced\\nobservability capabilities? Follow the link to Kibana and begin your own\\nexploration of how Elastic and OpenTelemetry can transform your approach to\\nobservability.`}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Live demo: \\",(0,t.jsx)(e.a,{href:\\"https://ela.st/demo-otel\\",rel:\\"nofollow\\",children:\\"https://ela.st/demo-otel\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[`But that\'s not all\\\\u2014if you want to take it a step further, you can deploy the\\nOpenTelemetry Demo directly with your own Elasticsearch stack. Follow the steps\\nprovided `,(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"here\\"}),` to set it up and\\nstart gaining valuable insights from your own environment.`]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(E);})();\\n;return Component;"},"_id":"articles/exploring-the-opentelemetry-demo-with-elastic.mdx","_raw":{"sourceFilePath":"articles/exploring-the-opentelemetry-demo-with-elastic.mdx","sourceFileName":"exploring-the-opentelemetry-demo-with-elastic.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/exploring-the-opentelemetry-demo-with-elastic"},"type":"Article","imageUrl":"/assets/images/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry/elastic-oteldemo.jpg","readingTime":"6 min read","url":"/opentelemetry-demo-with-the-elastic-distributions-of-opentelemetry","headings":[{"level":2,"title":"What is the OpenTelemetry Demo?","href":"#what-is-the-opentelemetry-demo"},{"level":3,"title":"Elastic\'s fork","href":"#elastics-fork"},{"level":2,"title":"Deeper Insights with Elastic Distributions","href":"#deeper-insights-with-elastic-distributions"},{"level":2,"title":"Collecting Data with the Elastic Collector Distribution","href":"#collecting-data-with-the-elastic-collector-distribution"},{"level":2,"title":"Error detection with Elastic","href":"#error-detection-with-elastic"},{"level":2,"title":"Start your own exploration","href":"#start-your-own-exploration"}]},{"title":"Future-proof your logs with ecs@mappings template","slug":"future-proof-your-logs-with-ecs-mappings-template","date":"2024-09-23","description":"Explore how the ecs@mappings component template in Elasticsearch simplifies data management by providing a centralized, official definition of Elastic Common Schema (ECS) mappings. Learn about its benefits, including reduced configuration hassles, improved data integrity, and enhanced performance for both integration developers and community users. Discover how this feature streamlines ECS field support across Elastic Agent integrations and future-proofs your data streams.","image":"article.jpg","author":[{"slug":"maurizio-branca","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"ecs","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs the Elasticsearch ecosystem evolves, so do the tools and methodologies designed to streamline data management. One advancement that will significantly benefit our community is the [ecs@mappings](https://github.com/elastic/elasticsearch/blob/v8.15.1/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json) component template.\\n\\n[ECS (Elastic Common Schema)](https://www.elastic.co/guide/en/ecs/current/ecs-reference.html) is a standardized data model for logs and metrics. It defines a set of [common field names and data types](https://www.elastic.co/guide/en/ecs/current/ecs-field-reference.html) that help ensure consistency and compatibility.\\n\\n`ecs@mappings` is a [component template](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-component-template.html) that offers an [Elastic-maintained](https://github.com/elastic/elasticsearch/blob/v8.15.1/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json) definition of ECS mappings. Each Elasticsearch release contains an always up-to-date definition of all ECS fields. \\n\\n### Elastic Common Schema and Open Telemetry\\n\\nElastic will preserve our user\'s investment in Elastic Common Schema by [donating](https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-faq) ECS to Open Telemetry. Elastic participates and collaborates with the OTel community to merge ECS and Open Telemetry\'s Semantic Conventions over time.\\n\\n## The Evolution of ECS Mappings\\n\\nHistorically, users and integration developers have defined ECS (Elastic Common Schema) mappings manually within individual index templates and packages, each meticulously listing its fields. Although straightforward, this approach proved time-consuming and challenging to maintain.\\n\\nTo tackle this challenge, integration developers moved towards two primary methodologies:\\n\\n1. Referencing ECS mappings\\n2. Importing ECS mappings directly\\n\\nThese methods were steps in the right direction but introduced their challenges, such as the maintenance cost of keeping the ECS mappings up-to-date with Elasticsearch changes.\\n\\n## Enter ecs@mappings\\n\\nThe [ecs@mappings](https://github.com/elastic/elasticsearch/blob/v8.15.1/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json) component template supports all the field definitions in ECS, leveraging naming conventions and a set of dynamic templates.\\n\\nElastic started shipping the `ecs@mappings` component template with Elasticsearch v8.9.0, including it in the [logs-*-* index template](https://github.com/elastic/elasticsearch/blob/v8.14.2/x-pack/plugin/core/template-resources/src/main/resources/logs%40template.json).\\n\\nWith Elasticsearch v8.13.0, Elastic now includes `ecs@mappings` in the index templates of all the Elastic Agent integrations.\\n\\nThis move was a breakthrough because:\\n\\n- **Centralized** and official: With ecs@mappings, we now have an official definition of ECS mappings.\\n- **Out-of-the-box functionality**: ECS mappings are readily available, reducing the need for additional imports or references.\\n- **Simplified maintenance**: The need to manually keep up with ECS changes has diminished since the template from Elasticsearch itself remains up-to-date.\\n\\n### Enhanced Consistency and Reliability\\n\\nWith `ecs@mappings`, ECS mappings become the single source of truth. This unified approach means fewer discrepancies and higher consistency in data streams across integrations.\\n\\n## How Community Users Benefit\\n\\nCommunity users stand to gain manifold from the adoption of `ecs@mappings`. Here are the key advantages:\\n\\n1. **Reduced configuration hassles**: Whether you are an advanced user or just getting started, the simplified setup means fewer configuration steps and fewer opportunities for errors.\\n2. **Improved data integrity**: Since ecs@mappings ensures that field definitions are accurate and up-to-date, data integrity is maintained effortlessly.\\n3. **Better performance**: With less overhead in maintaining and referencing ECS fields, your Elasticsearch operations run more smoothly.\\n4. **Enhanced documentation and discoverability**: As we standardize ECS mappings, the documentation can be centralized, making it easier for users to discover and understand ECS fields.\\n\\nLet\'s explore how the `ecs@mappings` component template helps users achieve these benefits.\\n\\n### Reduced configuration hassles\\n\\nModern Elasticsearch versions come with out-of-the-box full ECS field support (see the “requirements” section later for specific versions).\\n\\nFor example, the [Custom AWS Logs integration](https://docs.elastic.co/integrations/aws_logs) installed on a supported Elasticsearch cluster already includes the `ecs@mappings` component template in its index template:\\n\\n```json\\nGET _index_template/logs-aws_logs.generic\\n{\\n \\"index_templates\\": [\\n {\\n \\"name\\": \\"logs-aws_logs.generic\\",\\n ...,\\n \\"composed_of\\": [\\n \\"logs@settings\\",\\n \\"logs-aws_logs.generic@package\\",\\n \\"logs-aws_logs.generic@custom\\",\\n \\"ecs@mappings\\",\\n \\".fleet_globals-1\\",\\n \\".fleet_agent_id_verification-1\\"\\n ],\\n ...\\n```\\n\\nThere is no need to import or define any ECS field.\\n\\n### Improved data integrity\\n\\nThe `ecs@mappings` component template supports all the existing ECS fields. If you use any ECS field in your document, it will accurately have the expected type.\\n\\nTo ensure that `ecs@mappings` is always up to date with the [ECS repository](https://github.com/elastic/ecs/), we set up a daily [automated test](https://github.com/elastic/elasticsearch/blob/6ae9dbfda7d71ae3f1bd2bddf9334d37b3294632/x-pack/plugin/stack/src/javaRestTest/java/org/elasticsearch/xpack/stack/EcsDynamicTemplatesIT.java#L49) to ensure that the component template supports all fields. \\n\\n### Better Performance\\n\\n#### Compact definitions\\n\\nThe ECS field definition is exceptionally compact; at the time of this writing, it is 228 lines long and supports all ECS fields. To learn more, see the `ecs@mappings` component template [source code](https://github.com/elastic/elasticsearch/blob/v8.15.1/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json).\\n\\nIt relies on naming conventions and uses [dynamic templates](https://www.elastic.co/guide/en/elasticsearch/reference/8.14/dynamic-templates.html) to achieve this compactness.\\n\\n#### Lazy mapping\\n\\nElasticsearch only adds existing document fields to the mapping, thanks to dynamic templates. The lazy mapping keeps memory overhead at a minimum, improving cluster performance and making field suggestions more relevant.\\n\\n### Enhanced documentation and discoverability\\n\\nAll Elastic Agent integrations are migrating to the `ecs@mappings` component template. These integrations no longer need to add and maintain ECS field mappings and can reference the official [ECS Field Reference](https://www.elastic.co/guide/en/ecs/current/ecs-field-reference.html) or the ECS source code in the Git repository: https://github.com/elastic/ecs/.\\n\\n## Getting started\\n\\n### Requirements\\n\\nTo leverage the `ecs@mappings` component template, ensure the following stack version:\\n\\n- **8.9.0**: if your data stream uses the logs index template or you define your index template.\\n- **8.13.0**: if your data stream uses the index template of an Elastic Agent integration.\\n\\n### Example\\n\\nWe will use the [Custom AWS Logs integration](https://docs.elastic.co/integrations/aws_logs) to show you how `ecs@mapping` can handle mapping for any out-of-the-box ECS field.\\n\\nImagine you want to ingest the following log event using the Custom AWS Logs integration:\\n\\n```json\\n{\\n \\"@timestamp\\": \\"2024-06-11T13:16:00+02:00\\", \\n \\"command_line\\": \\"ls -ltr\\",\\n \\"custom_score\\": 42\\n}\\n```\\n\\n#### Dev Tools\\n\\nKibana offers an excellent tool for experimenting with Elasticseatch API, the [Dev Tools console](https://www.elastic.co/guide/en/kibana/current/console-kibana.html). With the Dev Tools, users can run all API requests quickly and without much friction.\\n\\nTo open the Dev Tools:\\n\\n- Open **Kibana**\\n- Select **Management > Dev Tools > Console**\\n\\n#### Elasticsearch version < 8.13\\n\\nOn Elasticsearch versions before 8.13, the Custom AWS Logs integration has the following index template:\\n\\n```json\\nGET _index_template/logs-aws_logs.generic\\n{\\n \\"index_templates\\": [\\n {\\n \\"name\\": \\"logs-aws_logs.generic\\",\\n \\"index_template\\": {\\n \\"index_patterns\\": [\\n \\"logs-aws_logs.generic-*\\"\\n ],\\n \\"template\\": {\\n \\"settings\\": {},\\n \\"mappings\\": {\\n \\"_meta\\": {\\n \\"package\\": {\\n \\"name\\": \\"aws_logs\\"\\n },\\n \\"managed_by\\": \\"fleet\\",\\n \\"managed\\": true\\n }\\n }\\n },\\n \\"composed_of\\": [\\n \\"logs-aws_logs.generic@package\\",\\n \\"logs-aws_logs.generic@custom\\",\\n \\".fleet_globals-1\\",\\n \\".fleet_agent_id_verification-1\\"\\n ],\\n \\"priority\\": 200,\\n \\"_meta\\": {\\n \\"package\\": {\\n \\"name\\": \\"aws_logs\\"\\n },\\n \\"managed_by\\": \\"fleet\\",\\n \\"managed\\": true\\n },\\n \\"data_stream\\": {\\n \\"hidden\\": false,\\n \\"allow_custom_routing\\": false\\n }\\n }\\n }\\n ]\\n}\\n```\\n\\nAs you can see, it does not include the ecs@mappings component template.\\n\\nIf we try to index the test document:\\n\\n```json\\nPOST logs-aws_logs.generic-default/_doc\\n{\\n \\"@timestamp\\": \\"2024-06-11T13:16:00+02:00\\", \\n \\"command_line\\": \\"ls -ltr\\",\\n \\"custom_score\\": 42\\n}\\n```\\n\\nThe data stream will have the following mappings:\\n\\n```\\nGET logs-aws_logs.generic-default/_mapping/field/command_line\\n{\\n \\".ds-logs-aws_logs.generic-default-2024.06.11-000001\\": {\\n \\"mappings\\": {\\n \\"command_line\\": {\\n \\"full_name\\": \\"command_line\\",\\n \\"mapping\\": {\\n \\"command_line\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n }\\n }\\n }\\n}\\n\\nGET logs-aws_logs.generic-default/_mapping/field/custom_score\\n{\\n \\".ds-logs-aws_logs.generic-default-2024.06.11-000001\\": {\\n \\"mappings\\": {\\n \\"custom_score\\": {\\n \\"full_name\\": \\"custom_score\\",\\n \\"mapping\\": {\\n \\"custom_score\\": {\\n \\"type\\": \\"long\\"\\n }\\n }\\n }\\n }\\n }\\n}\\n```\\n\\nThese mappings do not align with ECS, so users and developers had to maintain them.\\n\\n#### Elasticsearch version >= 8.13\\n\\nOn Elasticsearch versions equal to or newer to 8.13, the Custom AWS Logs integration has the following index template:\\n\\n```json\\nGET _index_template/logs-aws_logs.generic\\n{\\n \\"index_templates\\": [\\n {\\n \\"name\\": \\"logs-aws_logs.generic\\",\\n \\"index_template\\": {\\n \\"index_patterns\\": [\\n \\"logs-aws_logs.generic-*\\"\\n ],\\n \\"template\\": {\\n \\"settings\\": {},\\n \\"mappings\\": {\\n \\"_meta\\": {\\n \\"package\\": {\\n \\"name\\": \\"aws_logs\\"\\n },\\n \\"managed_by\\": \\"fleet\\",\\n \\"managed\\": true\\n }\\n }\\n },\\n \\"composed_of\\": [\\n \\"logs@settings\\",\\n \\"logs-aws_logs.generic@package\\",\\n \\"logs-aws_logs.generic@custom\\",\\n \\"ecs@mappings\\",\\n \\".fleet_globals-1\\",\\n \\".fleet_agent_id_verification-1\\"\\n ],\\n \\"priority\\": 200,\\n \\"_meta\\": {\\n \\"package\\": {\\n \\"name\\": \\"aws_logs\\"\\n },\\n \\"managed_by\\": \\"fleet\\",\\n \\"managed\\": true\\n },\\n \\"data_stream\\": {\\n \\"hidden\\": false,\\n \\"allow_custom_routing\\": false\\n },\\n \\"ignore_missing_component_templates\\": [\\n \\"logs-aws_logs.generic@custom\\"\\n ]\\n }\\n }\\n ]\\n}\\n```\\n\\nThe index template for `logs-aws_logs.generic` now includes the `ecs@mappings` component template.\\n\\nIf we try to index the test document:\\n\\n```json\\nPOST logs-aws_logs.generic-default/_doc\\n{\\n \\"@timestamp\\": \\"2024-06-11T13:16:00+02:00\\", \\n \\"command_line\\": \\"ls -ltr\\",\\n \\"custom_score\\": 42\\n}\\n```\\n\\nThe data stream will have the following mappings:\\n\\n```json\\nGET logs-aws_logs.generic-default/_mapping/field/command_line\\n{\\n \\".ds-logs-aws_logs.generic-default-2024.06.11-000001\\": {\\n \\"mappings\\": {\\n \\"command_line\\": {\\n \\"full_name\\": \\"command_line\\",\\n \\"mapping\\": {\\n \\"command_line\\": {\\n \\"type\\": \\"wildcard\\",\\n \\"fields\\": {\\n \\"text\\": {\\n \\"type\\": \\"match_only_text\\"\\n }\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n\\nGET logs-aws_logs.generic-default/_mapping/field/custom_score\\n{\\n \\".ds-logs-aws_logs.generic-default-2024.06.11-000001\\": {\\n \\"mappings\\": {\\n \\"custom_score\\": {\\n \\"full_name\\": \\"custom_score\\",\\n \\"mapping\\": {\\n \\"custom_score\\": {\\n \\"type\\": \\"float\\"\\n }\\n }\\n }\\n }\\n }\\n}\\n```\\n\\nIn Elasticsearch 8.13, fields like `command_line` and `custom_score` get their definition from ECS out-of-the-box. \\n\\nThese mappings align with ECS, so users and developers do not have to maintain them. The same applies to all the hundreds of field definitions in the Elastic Common Schema. You can achieve this by including a 200-liner component template in your data stream. \\n\\n## Caveats\\n\\nSome aspects of how the ecs@mappings component template deals with data types are worth mentioning.\\n\\n### ECS types are not enforced\\n\\nThe `ecs@mappings` component template does not contain mappings for ECS fields where dynamic mapping already uses the correct field type. Therefore, if you send a field value with a compatible but wrong type, Elasticsearch will not coerce the value.\\n\\nFor example, if you send the following document with a faas.coldstart field (defined as boolean in ECS):\\n\\n```json\\n{\\n \\"faas.coldstart\\": \\"true\\"\\n}\\n```\\n\\nElasticsearch will map `faas.coldstart` as a `keyword` and not a `boolean`. Therefore, you need to make sure that the values you ingest to Elasticsearch use the right JSON field types, according to how they’re defined in ECS.\\n\\nThis is the tradeoff for having a compact and efficient ecs@mappings component template. It also allows for better compatibility when dealing with a mix of ECS and custom fields because documents won’t be rejected if the types are not consistent with the ones defined in ECS.\\n\\n## Conclusion\\n\\nThe introduction of `ecs@mappings` marks a significant improvement in managing ECS mappings within Elasticsearch. By centralizing and streamlining these definitions, we can ensure higher consistency, reduced maintenance, and better overall performance. \\n\\nWhether you\'re an integration developer or a community user, moving to `ecs@mappings` represents a step towards more efficient and reliable Elasticsearch operations. As we continue incorporating feedback and evolving our tools, your journey with Elasticsearch will only get smoother and more rewarding.\\n\\n**Join the Conversation**\\n\\nDo you have questions or feedback about `ecs@mappings`? Post on our helpful community of users on our community [discussion forum](https://discuss.elastic.co/) and [Slack instance](https://ela.st/slack) and share your experiences. Your input is invaluable in helping us fine-tune these advancements for the entire community.\\n\\nHappy mapping!\\n","code":"var Component=(()=>{var p=Object.create;var s=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),y=(i,e)=>{for(var t in e)s(i,t,{get:e[t],enumerable:!0})},l=(i,e,t,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!f.call(i,a)&&a!==t&&s(i,a,{get:()=>e[a],enumerable:!(o=m(e,a))||o.enumerable});return i};var _=(i,e,t)=>(t=i!=null?p(u(i)):{},l(e||!i||!i.__esModule?s(t,\\"default\\",{value:i,enumerable:!0}):t,i)),E=i=>l(s({},\\"__esModule\\",{value:!0}),i);var r=w((S,c)=>{c.exports=_jsx_runtime});var b={};y(b,{default:()=>h,frontmatter:()=>v});var n=_(r()),v={title:\\"Future-proof your logs with ecs@mappings template\\",slug:\\"future-proof-your-logs-with-ecs-mappings-template\\",date:\\"2024-09-23\\",description:\\"Explore how the ecs@mappings component template in Elasticsearch simplifies data management by providing a centralized, official definition of Elastic Common Schema (ECS) mappings. Learn about its benefits, including reduced configuration hassles, improved data integrity, and enhanced performance for both integration developers and community users. Discover how this feature streamlines ECS field support across Elastic Agent integrations and future-proofs your data streams.\\",author:[{slug:\\"maurizio-branca\\"}],image:\\"article.jpg\\",tags:[{slug:\\"log-analytics\\"},{slug:\\"ecs\\"}]};function d(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",h4:\\"h4\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"As the Elasticsearch ecosystem evolves, so do the tools and methodologies designed to streamline data management. One advancement that will significantly benefit our community is the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/elasticsearch/blob/v8.15.1/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json\\",rel:\\"nofollow\\",children:\\"ecs@mappings\\"}),\\" component template.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-reference.html\\",rel:\\"nofollow\\",children:\\"ECS (Elastic Common Schema)\\"}),\\" is a standardized data model for logs and metrics. It defines a set of \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-field-reference.html\\",rel:\\"nofollow\\",children:\\"common field names and data types\\"}),\\" that help ensure consistency and compatibility.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" is a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-component-template.html\\",rel:\\"nofollow\\",children:\\"component template\\"}),\\" that offers an \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/elasticsearch/blob/v8.15.1/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json\\",rel:\\"nofollow\\",children:\\"Elastic-maintained\\"}),\\" definition of ECS mappings. Each Elasticsearch release contains an always up-to-date definition of all ECS fields.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"elastic-common-schema-and-open-telemetry\\",children:\\"Elastic Common Schema and Open Telemetry\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic will preserve our user\'s investment in Elastic Common Schema by \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ecs-elastic-common-schema-otel-opentelemetry-faq\\",rel:\\"nofollow\\",children:\\"donating\\"}),\\" ECS to Open Telemetry. Elastic participates and collaborates with the OTel community to merge ECS and Open Telemetry\'s Semantic Conventions over time.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"the-evolution-of-ecs-mappings\\",children:\\"The Evolution of ECS Mappings\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Historically, users and integration developers have defined ECS (Elastic Common Schema) mappings manually within individual index templates and packages, each meticulously listing its fields. Although straightforward, this approach proved time-consuming and challenging to maintain.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To tackle this challenge, integration developers moved towards two primary methodologies:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Referencing ECS mappings\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Importing ECS mappings directly\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"These methods were steps in the right direction but introduced their challenges, such as the maintenance cost of keeping the ECS mappings up-to-date with Elasticsearch changes.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"enter-ecsmappings\\",children:\\"Enter ecs@mappings\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/elasticsearch/blob/v8.15.1/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json\\",rel:\\"nofollow\\",children:\\"ecs@mappings\\"}),\\" component template supports all the field definitions in ECS, leveraging naming conventions and a set of dynamic templates.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic started shipping the \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" component template with Elasticsearch v8.9.0, including it in the \\",(0,n.jsxs)(e.a,{href:\\"https://github.com/elastic/elasticsearch/blob/v8.14.2/x-pack/plugin/core/template-resources/src/main/resources/logs%40template.json\\",rel:\\"nofollow\\",children:[\\"logs-\\",(0,n.jsx)(e.em,{children:\\"-\\"}),\\" index template\\"]}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"With Elasticsearch v8.13.0, Elastic now includes \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" in the index templates of all the Elastic Agent integrations.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"This move was a breakthrough because:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Centralized\\"}),\\" and official: With ecs@mappings, we now have an official definition of ECS mappings.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Out-of-the-box functionality\\"}),\\": ECS mappings are readily available, reducing the need for additional imports or references.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Simplified maintenance\\"}),\\": The need to manually keep up with ECS changes has diminished since the template from Elasticsearch itself remains up-to-date.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"enhanced-consistency-and-reliability\\",children:\\"Enhanced Consistency and Reliability\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"With \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\", ECS mappings become the single source of truth. This unified approach means fewer discrepancies and higher consistency in data streams across integrations.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"how-community-users-benefit\\",children:\\"How Community Users Benefit\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Community users stand to gain manifold from the adoption of \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\". Here are the key advantages:\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Reduced configuration hassles\\"}),\\": Whether you are an advanced user or just getting started, the simplified setup means fewer configuration steps and fewer opportunities for errors.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Improved data integrity\\"}),\\": Since ecs@mappings ensures that field definitions are accurate and up-to-date, data integrity is maintained effortlessly.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Better performance\\"}),\\": With less overhead in maintaining and referencing ECS fields, your Elasticsearch operations run more smoothly.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Enhanced documentation and discoverability\\"}),\\": As we standardize ECS mappings, the documentation can be centralized, making it easier for users to discover and understand ECS fields.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Let\'s explore how the \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" component template helps users achieve these benefits.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"reduced-configuration-hassles\\",children:\\"Reduced configuration hassles\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Modern Elasticsearch versions come with out-of-the-box full ECS field support (see the \\\\u201Crequirements\\\\u201D section later for specific versions).\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"For example, the \\",(0,n.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/aws_logs\\",rel:\\"nofollow\\",children:\\"Custom AWS Logs integration\\"}),\\" installed on a supported Elasticsearch cluster already includes the \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" component template in its index template:\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`GET _index_template/logs-aws_logs.generic\\n{\\n \\"index_templates\\": [\\n {\\n \\"name\\": \\"logs-aws_logs.generic\\",\\n ...,\\n \\"composed_of\\": [\\n \\"logs@settings\\",\\n \\"logs-aws_logs.generic@package\\",\\n \\"logs-aws_logs.generic@custom\\",\\n \\"ecs@mappings\\",\\n \\".fleet_globals-1\\",\\n \\".fleet_agent_id_verification-1\\"\\n ],\\n ...\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"There is no need to import or define any ECS field.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"improved-data-integrity\\",children:\\"Improved data integrity\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" component template supports all the existing ECS fields. If you use any ECS field in your document, it will accurately have the expected type.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To ensure that \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" is always up to date with the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/ecs/\\",rel:\\"nofollow\\",children:\\"ECS repository\\"}),\\", we set up a daily \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/elasticsearch/blob/6ae9dbfda7d71ae3f1bd2bddf9334d37b3294632/x-pack/plugin/stack/src/javaRestTest/java/org/elasticsearch/xpack/stack/EcsDynamicTemplatesIT.java#L49\\",rel:\\"nofollow\\",children:\\"automated test\\"}),\\" to ensure that the component template supports all fields.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"better-performance\\",children:\\"Better Performance\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"compact-definitions\\",children:\\"Compact definitions\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The ECS field definition is exceptionally compact; at the time of this writing, it is 228 lines long and supports all ECS fields. To learn more, see the \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" component template \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/elasticsearch/blob/v8.15.1/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json\\",rel:\\"nofollow\\",children:\\"source code\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"It relies on naming conventions and uses \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/8.14/dynamic-templates.html\\",rel:\\"nofollow\\",children:\\"dynamic templates\\"}),\\" to achieve this compactness.\\"]}),`\\n`,(0,n.jsx)(e.h4,{id:\\"lazy-mapping\\",children:\\"Lazy mapping\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elasticsearch only adds existing document fields to the mapping, thanks to dynamic templates. The lazy mapping keeps memory overhead at a minimum, improving cluster performance and making field suggestions more relevant.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"enhanced-documentation-and-discoverability\\",children:\\"Enhanced documentation and discoverability\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"All Elastic Agent integrations are migrating to the \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" component template. These integrations no longer need to add and maintain ECS field mappings and can reference the official \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-field-reference.html\\",rel:\\"nofollow\\",children:\\"ECS Field Reference\\"}),\\" or the ECS source code in the Git repository: \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/ecs/\\",rel:\\"nofollow\\",children:\\"https://github.com/elastic/ecs/\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"getting-started\\",children:\\"Getting started\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"requirements\\",children:\\"Requirements\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To leverage the \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" component template, ensure the following stack version:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"8.9.0\\"}),\\": if your data stream uses the logs index template or you define your index template.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"8.13.0\\"}),\\": if your data stream uses the index template of an Elastic Agent integration.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"example\\",children:\\"Example\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We will use the \\",(0,n.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/aws_logs\\",rel:\\"nofollow\\",children:\\"Custom AWS Logs integration\\"}),\\" to show you how \\",(0,n.jsx)(e.code,{children:\\"ecs@mapping\\"}),\\" can handle mapping for any out-of-the-box ECS field.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Imagine you want to ingest the following log event using the Custom AWS Logs integration:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"@timestamp\\": \\"2024-06-11T13:16:00+02:00\\", \\n \\"command_line\\": \\"ls -ltr\\",\\n \\"custom_score\\": 42\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h4,{id:\\"dev-tools\\",children:\\"Dev Tools\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Kibana offers an excellent tool for experimenting with Elasticseatch API, the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/console-kibana.html\\",rel:\\"nofollow\\",children:\\"Dev Tools console\\"}),\\". With the Dev Tools, users can run all API requests quickly and without much friction.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"To open the Dev Tools:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Open \\",(0,n.jsx)(e.strong,{children:\\"Kibana\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Select \\",(0,n.jsx)(e.strong,{children:\\"Management > Dev Tools > Console\\"})]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h4,{id:\\"elasticsearch-version--813\\",children:\\"Elasticsearch version < 8.13\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"On Elasticsearch versions before 8.13, the Custom AWS Logs integration has the following index template:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`GET _index_template/logs-aws_logs.generic\\n{\\n \\"index_templates\\": [\\n {\\n \\"name\\": \\"logs-aws_logs.generic\\",\\n \\"index_template\\": {\\n \\"index_patterns\\": [\\n \\"logs-aws_logs.generic-*\\"\\n ],\\n \\"template\\": {\\n \\"settings\\": {},\\n \\"mappings\\": {\\n \\"_meta\\": {\\n \\"package\\": {\\n \\"name\\": \\"aws_logs\\"\\n },\\n \\"managed_by\\": \\"fleet\\",\\n \\"managed\\": true\\n }\\n }\\n },\\n \\"composed_of\\": [\\n \\"logs-aws_logs.generic@package\\",\\n \\"logs-aws_logs.generic@custom\\",\\n \\".fleet_globals-1\\",\\n \\".fleet_agent_id_verification-1\\"\\n ],\\n \\"priority\\": 200,\\n \\"_meta\\": {\\n \\"package\\": {\\n \\"name\\": \\"aws_logs\\"\\n },\\n \\"managed_by\\": \\"fleet\\",\\n \\"managed\\": true\\n },\\n \\"data_stream\\": {\\n \\"hidden\\": false,\\n \\"allow_custom_routing\\": false\\n }\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As you can see, it does not include the ecs@mappings component template.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If we try to index the test document:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`POST logs-aws_logs.generic-default/_doc\\n{\\n \\"@timestamp\\": \\"2024-06-11T13:16:00+02:00\\", \\n \\"command_line\\": \\"ls -ltr\\",\\n \\"custom_score\\": 42\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The data stream will have the following mappings:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`GET logs-aws_logs.generic-default/_mapping/field/command_line\\n{\\n \\".ds-logs-aws_logs.generic-default-2024.06.11-000001\\": {\\n \\"mappings\\": {\\n \\"command_line\\": {\\n \\"full_name\\": \\"command_line\\",\\n \\"mapping\\": {\\n \\"command_line\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n }\\n }\\n }\\n}\\n\\nGET logs-aws_logs.generic-default/_mapping/field/custom_score\\n{\\n \\".ds-logs-aws_logs.generic-default-2024.06.11-000001\\": {\\n \\"mappings\\": {\\n \\"custom_score\\": {\\n \\"full_name\\": \\"custom_score\\",\\n \\"mapping\\": {\\n \\"custom_score\\": {\\n \\"type\\": \\"long\\"\\n }\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"These mappings do not align with ECS, so users and developers had to maintain them.\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"elasticsearch-version--813-1\\",children:\\"Elasticsearch version >= 8.13\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"On Elasticsearch versions equal to or newer to 8.13, the Custom AWS Logs integration has the following index template:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`GET _index_template/logs-aws_logs.generic\\n{\\n \\"index_templates\\": [\\n {\\n \\"name\\": \\"logs-aws_logs.generic\\",\\n \\"index_template\\": {\\n \\"index_patterns\\": [\\n \\"logs-aws_logs.generic-*\\"\\n ],\\n \\"template\\": {\\n \\"settings\\": {},\\n \\"mappings\\": {\\n \\"_meta\\": {\\n \\"package\\": {\\n \\"name\\": \\"aws_logs\\"\\n },\\n \\"managed_by\\": \\"fleet\\",\\n \\"managed\\": true\\n }\\n }\\n },\\n \\"composed_of\\": [\\n \\"logs@settings\\",\\n \\"logs-aws_logs.generic@package\\",\\n \\"logs-aws_logs.generic@custom\\",\\n \\"ecs@mappings\\",\\n \\".fleet_globals-1\\",\\n \\".fleet_agent_id_verification-1\\"\\n ],\\n \\"priority\\": 200,\\n \\"_meta\\": {\\n \\"package\\": {\\n \\"name\\": \\"aws_logs\\"\\n },\\n \\"managed_by\\": \\"fleet\\",\\n \\"managed\\": true\\n },\\n \\"data_stream\\": {\\n \\"hidden\\": false,\\n \\"allow_custom_routing\\": false\\n },\\n \\"ignore_missing_component_templates\\": [\\n \\"logs-aws_logs.generic@custom\\"\\n ]\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The index template for \\",(0,n.jsx)(e.code,{children:\\"logs-aws_logs.generic\\"}),\\" now includes the \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" component template.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"If we try to index the test document:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`POST logs-aws_logs.generic-default/_doc\\n{\\n \\"@timestamp\\": \\"2024-06-11T13:16:00+02:00\\", \\n \\"command_line\\": \\"ls -ltr\\",\\n \\"custom_score\\": 42\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The data stream will have the following mappings:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`GET logs-aws_logs.generic-default/_mapping/field/command_line\\n{\\n \\".ds-logs-aws_logs.generic-default-2024.06.11-000001\\": {\\n \\"mappings\\": {\\n \\"command_line\\": {\\n \\"full_name\\": \\"command_line\\",\\n \\"mapping\\": {\\n \\"command_line\\": {\\n \\"type\\": \\"wildcard\\",\\n \\"fields\\": {\\n \\"text\\": {\\n \\"type\\": \\"match_only_text\\"\\n }\\n }\\n }\\n }\\n }\\n }\\n }\\n}\\n\\nGET logs-aws_logs.generic-default/_mapping/field/custom_score\\n{\\n \\".ds-logs-aws_logs.generic-default-2024.06.11-000001\\": {\\n \\"mappings\\": {\\n \\"custom_score\\": {\\n \\"full_name\\": \\"custom_score\\",\\n \\"mapping\\": {\\n \\"custom_score\\": {\\n \\"type\\": \\"float\\"\\n }\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In Elasticsearch 8.13, fields like \\",(0,n.jsx)(e.code,{children:\\"command_line\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"custom_score\\"}),\\" get their definition from ECS out-of-the-box.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"These mappings align with ECS, so users and developers do not have to maintain them. The same applies to all the hundreds of field definitions in the Elastic Common Schema. You can achieve this by including a 200-liner component template in your data stream.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"caveats\\",children:\\"Caveats\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Some aspects of how the ecs@mappings component template deals with data types are worth mentioning.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"ecs-types-are-not-enforced\\",children:\\"ECS types are not enforced\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" component template does not contain mappings for ECS fields where dynamic mapping already uses the correct field type. Therefore, if you send a field value with a compatible but wrong type, Elasticsearch will not coerce the value.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"For example, if you send the following document with a faas.coldstart field (defined as boolean in ECS):\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"faas.coldstart\\": \\"true\\"\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elasticsearch will map \\",(0,n.jsx)(e.code,{children:\\"faas.coldstart\\"}),\\" as a \\",(0,n.jsx)(e.code,{children:\\"keyword\\"}),\\" and not a \\",(0,n.jsx)(e.code,{children:\\"boolean\\"}),\\". Therefore, you need to make sure that the values you ingest to Elasticsearch use the right JSON field types, according to how they\\\\u2019re defined in ECS.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"This is the tradeoff for having a compact and efficient ecs@mappings component template. It also allows for better compatibility when dealing with a mix of ECS and custom fields because documents won\\\\u2019t be rejected if the types are not consistent with the ones defined in ECS.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The introduction of \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" marks a significant improvement in managing ECS mappings within Elasticsearch. By centralizing and streamlining these definitions, we can ensure higher consistency, reduced maintenance, and better overall performance.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Whether you\'re an integration developer or a community user, moving to \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\" represents a step towards more efficient and reliable Elasticsearch operations. As we continue incorporating feedback and evolving our tools, your journey with Elasticsearch will only get smoother and more rewarding.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Join the Conversation\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Do you have questions or feedback about \\",(0,n.jsx)(e.code,{children:\\"ecs@mappings\\"}),\\"? Post on our helpful community of users on our community \\",(0,n.jsx)(e.a,{href:\\"https://discuss.elastic.co/\\",rel:\\"nofollow\\",children:\\"discussion forum\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://ela.st/slack\\",rel:\\"nofollow\\",children:\\"Slack instance\\"}),\\" and share your experiences. Your input is invaluable in helping us fine-tune these advancements for the entire community.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Happy mapping!\\"})]})}function h(i={}){let{wrapper:e}=i.components||{};return e?(0,n.jsx)(e,{...i,children:(0,n.jsx)(d,{...i})}):d(i)}return E(b);})();\\n;return Component;"},"_id":"articles/future-proof-your-logs-with-ecs-mappings-template.mdx","_raw":{"sourceFilePath":"articles/future-proof-your-logs-with-ecs-mappings-template.mdx","sourceFileName":"future-proof-your-logs-with-ecs-mappings-template.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/future-proof-your-logs-with-ecs-mappings-template"},"type":"Article","imageUrl":"/assets/images/future-proof-your-logs-with-ecs-mappings-template/article.jpg","readingTime":"16 min read","url":"/future-proof-your-logs-with-ecs-mappings-template","headings":[{"level":3,"title":"Elastic Common Schema and Open Telemetry","href":"#elastic-common-schema-and-open-telemetry"},{"level":2,"title":"The Evolution of ECS Mappings","href":"#the-evolution-of-ecs-mappings"},{"level":2,"title":"Enter ecs@mappings","href":"#enter-ecsmappings"},{"level":3,"title":"Enhanced Consistency and Reliability","href":"#enhanced-consistency-and-reliability"},{"level":2,"title":"How Community Users Benefit","href":"#how-community-users-benefit"},{"level":3,"title":"Reduced configuration hassles","href":"#reduced-configuration-hassles"},{"level":3,"title":"Improved data integrity","href":"#improved-data-integrity"},{"level":3,"title":"Better Performance","href":"#better-performance"},{"level":4,"title":"Compact definitions","href":"#compact-definitions"},{"level":4,"title":"Lazy mapping","href":"#lazy-mapping"},{"level":3,"title":"Enhanced documentation and discoverability","href":"#enhanced-documentation-and-discoverability"},{"level":2,"title":"Getting started","href":"#getting-started"},{"level":3,"title":"Requirements","href":"#requirements"},{"level":3,"title":"Example","href":"#example"},{"level":4,"title":"Dev Tools","href":"#dev-tools"},{"level":4,"title":"Elasticsearch version < 8.13","href":"#elasticsearch-version--813"},{"level":4,"title":"Elasticsearch version >= 8.13","href":"#elasticsearch-version--813-1"},{"level":2,"title":"Caveats","href":"#caveats"},{"level":3,"title":"ECS types are not enforced","href":"#ecs-types-are-not-enforced"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Getting started with OpenTelemetry instrumentation with a sample application","slug":"getting-started-opentelemetry-instrumentation-sample-app","date":"2023-09-12","description":"In this article, we’ll introduce you to a simple sample application: a UI for movie search, instrumented in different ways using Python, Go, Java, Node, and .NET. Additionally, we will show how to view your OpenTelemetry data in Elastic APM.","image":"email-thumbnail-generic-release-cloud_(1).png","author":[{"slug":"luca-wintergerst","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nApplication performance management (APM) has moved beyond traditional monitoring to become an essential tool for developers, offering deep insights into applications at the code level. With APM, teams can not only detect issues but also understand their root causes, optimizing software performance and end-user experiences. The modern landscape presents a wide range of APM tools and companies offering different solutions. Additionally, OpenTelemetry is becoming the open ingestion standard for APM. With OpenTelemetry, DevOps teams have a consistent approach to collecting and ingesting telemetry data.\\n\\nElastic\xae offers its own [APM Agents](https://www.elastic.co/guide/en/apm/agent/index.html), which can be used for instrumenting your code. In addition, Elastic also [supports OpenTelemtry](https://www.elastic.co/observability/opentelemetry) natively.\\n\\nNavigating the differences and understanding how to instrument applications using these tools can be challenging. That\'s where [our sample application, Elastiflix — a UI for movie search](https://github.com/elastic/observability-examples/tree/main/Elastiflix) — comes into play. We\'ve crafted it to demonstrate the nuances of both OTEL and Elastic APM, guiding you through the process of the APM instrumentation and showcasing how you can use one or the other, depending on your preference.\\n\\n## The sample application\\n\\nWe deliberately kept the [movie search UI really simple](https://github.com/elastic/observability-examples/tree/main/Elastiflix). It displays some movies, has a search bar, and, at the time of writing, only one real functionality: you can add a movie to your list of favorites.\\n\\n![luca](/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-1-luca.png)\\n\\n![services](/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-2-services.png)\\n\\n## Services, languages, and instrumentation\\n\\nOur application has a few different services:\\n\\n- **javascript-frontend:** A React frontend, talking to the node service and Elasticsearch\xae\\n- **node-server:** Node backend, talking to other backend services\\n- **dotnet-login:** A login service that returns a random username\\n\\nWe reimplemented the “favorite” service in a few different languages, as we did not want to introduce additional complexity to the architecture of the application.\\n\\n- **Go-favorite:** A Go service that stores a list of favorites movies in Redis\\n- **Java-favorite:** A Java service that stores a list of favorites movies in Redis\\n- **Python-favorite:** A Python service that stores a list of favorites movies in Redis\\n\\nIn addition, there’s also some other supporting containers:\\n\\n- **Movie-data-loader:** Loads the movie database into your Elasticsearch cluster\\n- **Redis:** Used as a datastore for keeping track of the user’s favorites\\n- **Locust:** A load generator that talks to the node service to introduce artificial load\\n\\n![flowchart](/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-3-flowchart.png)\\n\\nThe main difference compared to some other sample application repositories is that we’ve coded it in several languages, with each language version showcasing almost all possible types of instrumentation:\\n\\n![types of instrumentation](/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-4-types_of_instrumentation.png)\\n\\n### Why this approach?\\n\\nWhile sample applications provide good insight into how tools work, they often showcase only one version, leaving developers to find all of the necessary modifications themselves. We\'ve taken a different approach. By offering multiple versions, we intend to bridge the knowledge gap, making it straightforward for developers to see and comprehend the transition process from non-instrumented code to either Elastic or OTEL instrumented versions.\\n\\nInstead of simply starting the already instrumented version, you can instrument the base version yourself, by following some of our other blogs. This will teach you much more than just looking at an already built version.\\n\\n- Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n- Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n- Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n- .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n\\n## Prerequisites\\n\\n- Docker and Compose\\n- Elastic Cloud Cluster ([start your free trial](https://ela.st/freetrial))\\n\\nBefore starting the sample application, ensure you\'ve set up your Elastic deployment details. Populate the .env file (located in the same directory as the compose files) with the necessary credentials. You can copy these from the Cloud UI and from within Kibana\xae under the path /app/home#/tutorial/apm.\\n\\n**Cloud UI**\\n\\n![my deployment](/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-5-deployment.png)\\n\\n**Kibana APM Tutorial**\\n\\n![Kibana APM Tutorial](/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-6-configure-agent.png)\\n\\n```bash\\nELASTIC_APM_SERVER_URL=\\"https://foobar.apm.us-central1.gcp.cloud.es.io\\"\\nELASTIC_APM_SECRET_TOKEN=\\"secret123\\"\\nELASTICSEARCH_USERNAME=\\"elastic\\"\\nELASTICSEARCH_PASSWORD=\\"changeme\\"\\nELASTICSEARCH_URL=\\"https://foobar.es.us-central1.gcp.cloud.es.io\\"\\n\\n```\\n\\n## Starting the application\\n\\nYou have the flexibility to initiate our sample app in three distinctive manners, each corresponding to a different instrumentation scenario.\\n\\nWe provide public Docker images that you can use when you supply the --no-build flag. Otherwise the images will be built from source on your machine, which will take around 5–10 minutes.\\n\\n**1. Non-instrumented version**\\n\\n```bash\\ncd Elastiflix\\ndocker-compose -f docker-compose.yml up -d --no-build\\n```\\n\\n**2. Elastic instrumented version**\\n\\n```bash\\ncd Elastiflix\\ndocker-compose -f docker-compose-elastic.yml up -d --no-build\\n```\\n\\n**3. OpenTelemetry instrumented version**\\n\\n```bash\\ncd Elastiflix\\ndocker-compose -f docker-compose-elastic-otel.yml up -d --no-build\\n```\\n\\nAfter launching the desired version, explore the application at localhost:9000. We also deploy a load generator on localhost:8089 where you can increase the number of concurrent users. Note that the load generator is talking directly to the node backend service. If you want to generate RUM data from the javascript frontend, then you have to manually browse to localhost:9000 and visit a few pages.\\n\\n## Simulation and failure scenarios\\n\\nIn the real world, applications are subject to varying conditions, random bugs, and misconfigurations. We\'ve incorporated some of these to mimic potential real-life situations. You can find a list of possible environment variables [here](https://github.com/elastic/observability-examples#scenario--feature-toggles).\\n\\n**Non-instrumented scenarios**\\n\\n```bash\\n# healthy\\ndocker-compose -f docker-compose.yml up -d\\n\\n# pause redis for 5 seconds, every 30 seconds\\nTOGGLE_CLIENT_PAUSE=true docker-compose -f docker-compose.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 docker-compose -f docker-compose.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms, and fail 20% of them\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 TOGGLE_CANARY_FAILURE=0.2 docker-compose -f docker-compose.yml up -d\\n\\n# throw error in nodejs service, 50% of the time\\nTHROW_NOT_A_FUNCTION_ERROR=true docker-compose -f docker-compose.yml up -d\\n```\\n\\n**Elastic instrumented scenarios**\\n\\n```bash\\n# healthy\\ndocker-compose -f docker-compose-elastic.yml up -d\\n\\n# pause redis for 5 seconds, every 30 seconds\\nTOGGLE_CLIENT_PAUSE=true docker-compose -f docker-compose-elastic.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 docker-compose -f docker-compose-elastic.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms, and fail 20% of them\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 TOGGLE_CANARY_FAILURE=0.2 docker-compose -f docker-compose-elastic.yml up -d\\n\\n# throw error in nodejs service, 50% of the time\\nTHROW_NOT_A_FUNCTION_ERROR=true docker-compose -f docker-compose-elastic.yml up -d\\n\\n```\\n\\n**OpenTelemetry instrumented scenarios**\\n\\n```bash\\n# healthy\\ndocker-compose -f docker-compose-elastic-otel.yml up -d\\n\\n# pause redis for 5 seconds, every 30 seconds\\nTOGGLE_CLIENT_PAUSE=true docker-compose -f docker-compose-elastic-otel.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 docker-compose -f docker-compose-elastic-otel.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms, and fail 20% of them\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 TOGGLE_CANARY_FAILURE=0.2 docker-compose -f docker-compose-elastic-otel.yml up -d\\n\\n\\n# throw error in nodejs service, 50% of the time\\nTHROW_NOT_A_FUNCTION_ERROR=true docker-compose -f docker-compose-elastic-otel.yml up -d\\n```\\n\\n## Mix Elastic and OTel\\n\\nSince the application has the services in all possible permutations and the “favorite” service even written in multiple languages, you can also run them in a mixed mode.\\n\\nYou can also run some of them in parallel, like we do for the “favorite” service.\\n\\nElastic and OTel are fully compatible, so you could run some services instrumented with OTel while others are running with the Elastic APM Agent.\\n\\nTake a look at the existing compose file and simply copy one of the snippets for each service type.\\n\\n```yaml\\nfavorite-java-otel-auto:\\n build: java-favorite-otel-auto/.\\n image: docker.elastic.co/demos/workshop/observability/elastiflix-java-favorite-otel-auto:${ELASTIC_VERSION}-${BUILD_NUMBER}\\n depends_on:\\n - redis\\n networks:\\n - app-network\\n ports:\\n - \\"5004:5000\\"\\n environment:\\n - ELASTIC_APM_SECRET_TOKEN=${ELASTIC_APM_SECRET_TOKEN}\\n - OTEL_EXPORTER_OTLP_ENDPOINT=${ELASTIC_APM_SERVER_URL}\\n - OTEL_METRICS_EXPORTER=otlp\\n - OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\n - OTEL_SERVICE_NAME=java-favorite-otel-auto\\n - OTEL_TRACES_EXPORTER=otlp\\n - REDIS_HOST=redis\\n - TOGGLE_SERVICE_DELAY=${TOGGLE_SERVICE_DELAY}\\n - TOGGLE_CANARY_DELAY=${TOGGLE_CANARY_DELAY}\\n - TOGGLE_CANARY_FAILURE=${TOGGLE_CANARY_FAILURE}\\n```\\n\\n## Working with the source code\\n\\nThe repository contains all possible permutations of the service.\\n\\n- Subdirectories are named in the format $langauge-$serviceName-(elastic|otel)-(auto|manual). As an example, [python-favorite-otel-auto](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto) is a Python service. The name of it is “favorite,” and it’s instrumented with OpenTelemetry, using auto-instrumentation.\\n- You can now compare this directory to the non-instrumented version of this service available under the directory [python-favorite](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite).\\n\\n![code](/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-7-code.png)\\n\\nThis allows you to easily understand the difference between the two. In addition, you can also start from scratch using the non-instrumentation version and try to instrument it yourself.\\n\\n## Conclusion\\n\\nMonitoring is more than just observing; it\'s about understanding and optimizing. Our sample application seeks to guide you on your journey with Elastic APM or OpenTelemetry, providing you with the tools to build resilient and high-performing applications.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var o in e)a(n,o,{get:e[o],enumerable:!0})},r=(n,e,o,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of u(e))!g.call(n,i)&&i!==o&&a(n,i,{get:()=>e[i],enumerable:!(l=m(e,i))||l.enumerable});return n};var E=(n,e,o)=>(o=n!=null?p(f(n)):{},r(e||!n||!n.__esModule?a(o,\\"default\\",{value:n,enumerable:!0}):o,n)),v=n=>r(a({},\\"__esModule\\",{value:!0}),n);var c=y((_,s)=>{s.exports=_jsx_runtime});var A={};w(A,{default:()=>h,frontmatter:()=>b});var t=E(c()),b={title:\\"Getting started with OpenTelemetry instrumentation with a sample application\\",slug:\\"getting-started-opentelemetry-instrumentation-sample-app\\",date:\\"2023-09-12\\",description:\\"In this article, we\\\\u2019ll introduce you to a simple sample application: a UI for movie search, instrumented in different ways using Python, Go, Java, Node, and .NET. Additionally, we will show how to view your OpenTelemetry data in Elastic APM.\\",author:[{slug:\\"luca-wintergerst\\"}],image:\\"email-thumbnail-generic-release-cloud_(1).png\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"}]};function d(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"Application performance management (APM) has moved beyond traditional monitoring to become an essential tool for developers, offering deep insights into applications at the code level. With APM, teams can not only detect issues but also understand their root causes, optimizing software performance and end-user experiences. The modern landscape presents a wide range of APM tools and companies offering different solutions. Additionally, OpenTelemetry is becoming the open ingestion standard for APM. With OpenTelemetry, DevOps teams have a consistent approach to collecting and ingesting telemetry data.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" offers its own \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/index.html\\",rel:\\"nofollow\\",children:\\"APM Agents\\"}),\\", which can be used for instrumenting your code. In addition, Elastic also \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"supports OpenTelemtry\\"}),\\" natively.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Navigating the differences and understanding how to instrument applications using these tools can be challenging. That\'s where \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"our sample application, Elastiflix \\\\u2014 a UI for movie search\\"}),\\" \\\\u2014 comes into play. We\'ve crafted it to demonstrate the nuances of both OTEL and Elastic APM, guiding you through the process of the APM instrumentation and showcasing how you can use one or the other, depending on your preference.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"the-sample-application\\",children:\\"The sample application\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We deliberately kept the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"movie search UI really simple\\"}),\\". It displays some movies, has a search bar, and, at the time of writing, only one real functionality: you can add a movie to your list of favorites.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-1-luca.png\\",alt:\\"luca\\",width:\\"1999\\",height:\\"1124\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-2-services.png\\",alt:\\"services\\",width:\\"1999\\",height:\\"1124\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"services-languages-and-instrumentation\\",children:\\"Services, languages, and instrumentation\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Our application has a few different services:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"javascript-frontend:\\"}),\\" A React frontend, talking to the node service and Elasticsearch\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"node-server:\\"}),\\" Node backend, talking to other backend services\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"dotnet-login:\\"}),\\" A login service that returns a random username\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We reimplemented the \\\\u201Cfavorite\\\\u201D service in a few different languages, as we did not want to introduce additional complexity to the architecture of the application.\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Go-favorite:\\"}),\\" A Go service that stores a list of favorites movies in Redis\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Java-favorite:\\"}),\\" A Java service that stores a list of favorites movies in Redis\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Python-favorite:\\"}),\\" A Python service that stores a list of favorites movies in Redis\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition, there\\\\u2019s also some other supporting containers:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Movie-data-loader:\\"}),\\" Loads the movie database into your Elasticsearch cluster\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Redis:\\"}),\\" Used as a datastore for keeping track of the user\\\\u2019s favorites\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Locust:\\"}),\\" A load generator that talks to the node service to introduce artificial load\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-3-flowchart.png\\",alt:\\"flowchart\\",width:\\"1999\\",height:\\"1126\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The main difference compared to some other sample application repositories is that we\\\\u2019ve coded it in several languages, with each language version showcasing almost all possible types of instrumentation:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-4-types_of_instrumentation.png\\",alt:\\"types of instrumentation\\",width:\\"1586\\",height:\\"554\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"why-this-approach\\",children:\\"Why this approach?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"While sample applications provide good insight into how tools work, they often showcase only one version, leaving developers to find all of the necessary modifications themselves. We\'ve taken a different approach. By offering multiple versions, we intend to bridge the knowledge gap, making it straightforward for developers to see and comprehend the transition process from non-instrumented code to either Elastic or OTEL instrumented versions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Instead of simply starting the already instrumented version, you can instrument the base version yourself, by following some of our other blogs. This will teach you much more than just looking at an already built version.\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Docker and Compose\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Elastic Cloud Cluster (\\",(0,t.jsx)(e.a,{href:\\"https://ela.st/freetrial\\",rel:\\"nofollow\\",children:\\"start your free trial\\"}),\\")\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Before starting the sample application, ensure you\'ve set up your Elastic deployment details. Populate the .env file (located in the same directory as the compose files) with the necessary credentials. You can copy these from the Cloud UI and from within Kibana\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" under the path /app/home#/tutorial/apm.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Cloud UI\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-5-deployment.png\\",alt:\\"my deployment\\",width:\\"1748\\",height:\\"860\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Kibana APM Tutorial\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-6-configure-agent.png\\",alt:\\"Kibana APM Tutorial\\",width:\\"1882\\",height:\\"908\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`ELASTIC_APM_SERVER_URL=\\"https://foobar.apm.us-central1.gcp.cloud.es.io\\"\\nELASTIC_APM_SECRET_TOKEN=\\"secret123\\"\\nELASTICSEARCH_USERNAME=\\"elastic\\"\\nELASTICSEARCH_PASSWORD=\\"changeme\\"\\nELASTICSEARCH_URL=\\"https://foobar.es.us-central1.gcp.cloud.es.io\\"\\n\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"starting-the-application\\",children:\\"Starting the application\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You have the flexibility to initiate our sample app in three distinctive manners, each corresponding to a different instrumentation scenario.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We provide public Docker images that you can use when you supply the --no-build flag. Otherwise the images will be built from source on your machine, which will take around 5\\\\u201310 minutes.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"1. Non-instrumented version\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`cd Elastiflix\\ndocker-compose -f docker-compose.yml up -d --no-build\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"2. Elastic instrumented version\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`cd Elastiflix\\ndocker-compose -f docker-compose-elastic.yml up -d --no-build\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"3. OpenTelemetry instrumented version\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`cd Elastiflix\\ndocker-compose -f docker-compose-elastic-otel.yml up -d --no-build\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"After launching the desired version, explore the application at localhost:9000. We also deploy a load generator on localhost:8089 where you can increase the number of concurrent users. Note that the load generator is talking directly to the node backend service. If you want to generate RUM data from the javascript frontend, then you have to manually browse to localhost:9000 and visit a few pages.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"simulation-and-failure-scenarios\\",children:\\"Simulation and failure scenarios\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the real world, applications are subject to varying conditions, random bugs, and misconfigurations. We\'ve incorporated some of these to mimic potential real-life situations. You can find a list of possible environment variables \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples#scenario--feature-toggles\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Non-instrumented scenarios\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`# healthy\\ndocker-compose -f docker-compose.yml up -d\\n\\n# pause redis for 5 seconds, every 30 seconds\\nTOGGLE_CLIENT_PAUSE=true docker-compose -f docker-compose.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 docker-compose -f docker-compose.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms, and fail 20% of them\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 TOGGLE_CANARY_FAILURE=0.2 docker-compose -f docker-compose.yml up -d\\n\\n# throw error in nodejs service, 50% of the time\\nTHROW_NOT_A_FUNCTION_ERROR=true docker-compose -f docker-compose.yml up -d\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Elastic instrumented scenarios\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`# healthy\\ndocker-compose -f docker-compose-elastic.yml up -d\\n\\n# pause redis for 5 seconds, every 30 seconds\\nTOGGLE_CLIENT_PAUSE=true docker-compose -f docker-compose-elastic.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 docker-compose -f docker-compose-elastic.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms, and fail 20% of them\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 TOGGLE_CANARY_FAILURE=0.2 docker-compose -f docker-compose-elastic.yml up -d\\n\\n# throw error in nodejs service, 50% of the time\\nTHROW_NOT_A_FUNCTION_ERROR=true docker-compose -f docker-compose-elastic.yml up -d\\n\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"OpenTelemetry instrumented scenarios\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`# healthy\\ndocker-compose -f docker-compose-elastic-otel.yml up -d\\n\\n# pause redis for 5 seconds, every 30 seconds\\nTOGGLE_CLIENT_PAUSE=true docker-compose -f docker-compose-elastic-otel.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 docker-compose -f docker-compose-elastic-otel.yml up -d\\n\\n# add artificial delay to python service, 100ms, delay 50% of requests by 1000ms, and fail 20% of them\\nTOGGLE_SERVICE_DELAY=100 TOGGLE_CANARY_DELAY=1000 TOGGLE_CANARY_FAILURE=0.2 docker-compose -f docker-compose-elastic-otel.yml up -d\\n\\n\\n# throw error in nodejs service, 50% of the time\\nTHROW_NOT_A_FUNCTION_ERROR=true docker-compose -f docker-compose-elastic-otel.yml up -d\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"mix-elastic-and-otel\\",children:\\"Mix Elastic and OTel\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Since the application has the services in all possible permutations and the \\\\u201Cfavorite\\\\u201D service even written in multiple languages, you can also run them in a mixed mode.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can also run some of them in parallel, like we do for the \\\\u201Cfavorite\\\\u201D service.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic and OTel are fully compatible, so you could run some services instrumented with OTel while others are running with the Elastic APM Agent.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Take a look at the existing compose file and simply copy one of the snippets for each service type.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`favorite-java-otel-auto:\\n build: java-favorite-otel-auto/.\\n image: docker.elastic.co/demos/workshop/observability/elastiflix-java-favorite-otel-auto:\\\\${ELASTIC_VERSION}-\\\\${BUILD_NUMBER}\\n depends_on:\\n - redis\\n networks:\\n - app-network\\n ports:\\n - \\"5004:5000\\"\\n environment:\\n - ELASTIC_APM_SECRET_TOKEN=\\\\${ELASTIC_APM_SECRET_TOKEN}\\n - OTEL_EXPORTER_OTLP_ENDPOINT=\\\\${ELASTIC_APM_SERVER_URL}\\n - OTEL_METRICS_EXPORTER=otlp\\n - OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\n - OTEL_SERVICE_NAME=java-favorite-otel-auto\\n - OTEL_TRACES_EXPORTER=otlp\\n - REDIS_HOST=redis\\n - TOGGLE_SERVICE_DELAY=\\\\${TOGGLE_SERVICE_DELAY}\\n - TOGGLE_CANARY_DELAY=\\\\${TOGGLE_CANARY_DELAY}\\n - TOGGLE_CANARY_FAILURE=\\\\${TOGGLE_CANARY_FAILURE}\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"working-with-the-source-code\\",children:\\"Working with the source code\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The repository contains all possible permutations of the service.\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Subdirectories are named in the format $langauge-$serviceName-(elastic|otel)-(auto|manual). As an example, \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto\\",rel:\\"nofollow\\",children:\\"python-favorite-otel-auto\\"}),\\" is a Python service. The name of it is \\\\u201Cfavorite,\\\\u201D and it\\\\u2019s instrumented with OpenTelemetry, using auto-instrumentation.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"You can now compare this directory to the non-instrumented version of this service available under the directory \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite\\",rel:\\"nofollow\\",children:\\"python-favorite\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/getting-started-opentelemetry-instrumentation-sample-app/elastic-blog-7-code.png\\",alt:\\"code\\",width:\\"1999\\",height:\\"1028\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This allows you to easily understand the difference between the two. In addition, you can also start from scratch using the non-instrumentation version and try to instrument it yourself.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Monitoring is more than just observing; it\'s about understanding and optimizing. Our sample application seeks to guide you on your journey with Elastic APM or OpenTelemetry, providing you with the tools to build resilient and high-performing applications.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Go: \\",(0,t.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return v(A);})();\\n;return Component;"},"_id":"articles/getting-started-opentelemetry-instrumentation-sample-app.mdx","_raw":{"sourceFilePath":"articles/getting-started-opentelemetry-instrumentation-sample-app.mdx","sourceFileName":"getting-started-opentelemetry-instrumentation-sample-app.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/getting-started-opentelemetry-instrumentation-sample-app"},"type":"Article","imageUrl":"/assets/images/getting-started-opentelemetry-instrumentation-sample-app/email-thumbnail-generic-release-cloud_(1).png","readingTime":"9 min read","url":"/getting-started-opentelemetry-instrumentation-sample-app","headings":[{"level":2,"title":"The sample application","href":"#the-sample-application"},{"level":2,"title":"Services, languages, and instrumentation","href":"#services-languages-and-instrumentation"},{"level":3,"title":"Why this approach?","href":"#why-this-approach"},{"level":2,"title":"Prerequisites","href":"#prerequisites"},{"level":2,"title":"Starting the application","href":"#starting-the-application"},{"level":2,"title":"Simulation and failure scenarios","href":"#simulation-and-failure-scenarios"},{"level":2,"title":"Mix Elastic and OTel","href":"#mix-elastic-and-otel"},{"level":2,"title":"Working with the source code","href":"#working-with-the-source-code"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Understanding APM: How to add extensions to the OpenTelemetry Java Agent","slug":"extensions-opentelemetry-java-agent","date":"2023-07-24","description":"This blog post provides a comprehensive guide for Site Reliability Engineers (SREs) and IT Operations to gain visibility and traceability into applications, especially those written with non-standard frameworks or without access to the source code.","image":"flexible-implementation-1680X980.png","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"java","type":"Tag","_raw":{}}],"body":{"raw":"\\n## Without code access, SREs and IT Operations cannot always get the visibility they need\\n\\nAs an SRE, have you ever had a situation where you were working on an application that was written with non-standard frameworks, or you wanted to get some interesting business data from an application (number of orders processed for example) but you didn’t have access to the source code?\\n\\nWe all know this can be a challenging scenario resulting in visibility gaps, inability to fully trace code end to end, and missing critical business monitoring data that is useful for understanding the true impact of issues.\\n\\nHow can we solve this? One way we discussed in the following three blogs:\\n\\n- [Create your own instrumentation with the Java Agent Plugin](https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin)\\n- [How to capture custom metrics without app code changes using the Java Agent Plugin](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n- [Regression testing your Java Agent Plugin](https://www.elastic.co/blog/regression-testing-your-java-agent-plugin)\\n\\nThis is where we develop a plugin for the Elastic\xae APM Agent to help get access to critical business data for monitoring and add tracing where none exists.\\n\\nWhat we will discuss in this blog is how you can do the same with the [OpenTelemetry Java Agent](https://opentelemetry.io/docs/instrumentation/java/automatic/) using the Extensions framework.\\n\\n## Basic concepts: How APM works\\n\\nBefore we continue, let\'s first understand a few basic concepts and terms.\\n\\n- **Java Agent:** This is a tool that can be used to instrument (or modify) the bytecode of class files in the Java Virtual Machine (JVM). Java agents are used for many purposes like performance monitoring, logging, security, and more.\\n- **Bytecode:** This is the intermediary code generated by the Java compiler from your Java source code. This code is interpreted or compiled on the fly by the JVM to produce machine code that can be executed.\\n- **Byte Buddy:** Byte Buddy is a code generation and manipulation library for Java. It is used to create, modify, or adapt Java classes at runtime. In the context of a Java Agent, Byte Buddy provides a powerful and flexible way to modify bytecode. **Both the Elastic APM Agent and the OpenTelemetry Agent use Byte Buddy under the covers.**\\n\\n**Now, let\'s talk about how automatic instrumentation works with Byte Buddy:**\\n\\nAutomatic instrumentation is the process by which an agent modifies the bytecode of your application\'s classes, often to insert monitoring code. The agent doesn\'t modify the source code directly, but rather the bytecode that is loaded into the JVM. This is done while the JVM is loading the classes, so the modifications are in effect during runtime.\\n\\nHere\'s a simplified explanation of the process:\\n\\n1. **Start the JVM with the agent:** When starting your Java application, you specify the Java agent with the -javaagent command line option. This instructs the JVM to load your agent before the main method of your application is invoked. At this point, the agent has the opportunity to set up class transformers.\\n\\n2. **Register a class file transformer with Byte Buddy:** Your agent will register a class file transformer with Byte Buddy. A transformer is a piece of code that is invoked every time a class is loaded into the JVM. This transformer receives the bytecode of the class and it can modify this bytecode before the class is actually used.\\n\\n3. **Transform the bytecode:** When your transformer is invoked, it will use Byte Buddy\'s API to modify the bytecode. Byte Buddy allows you to specify your transformations in a high-level, expressive way rather than manually writing complex bytecode. For example, you could specify a certain class and method within that class that you want to instrument and provide an \\"interceptor\\" that will add new behavior to that method.\\n\\n4. **Use the transformed classes:** Once the agent has set up its transformers, the JVM continues to load classes as usual. Each time a class is loaded, your transformers are invoked, allowing them to modify the bytecode. Your application then uses these transformed classes as if they were the original ones, but they now have the extra behavior that you\'ve injected through your interceptor.\\n\\n![flowchart process](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-1-flowchart-process.png)\\n\\nIn essence, automatic instrumentation with Byte Buddy is about modifying the behavior of your Java classes at runtime, without needing to alter the source code directly. This is especially useful for cross-cutting concerns like logging, monitoring, or security, as it allows you to centralize this code in your Java Agent, rather than scattering it throughout your application.\\n\\n## Application, prerequisites, and config\\n\\nThere is a really simple application in [this GitHub repository](https://github.com/davidgeorgehope/custom-instrumentation-examples) that is used throughout this blog. What it does is it simply asks you to input some text and then it counts the number of words.\\n\\nIt’s also listed below:\\n\\n```java\\npackage org.davidgeorgehope;\\nimport java.util.Scanner;\\nimport java.util.logging.Logger;\\n\\npublic class Main {\\n private static Logger logger = Logger.getLogger(Main.class.getName());\\n\\n public static void main(String[] args) {\\n Scanner scanner = new Scanner(System.in);\\n while (true) {\\n System.out.println(\\"Please enter your sentence:\\");\\n String input = scanner.nextLine();\\n Main main = new Main();\\n int wordCount = main.countWords(input);\\n System.out.println(\\"The input contains \\" + wordCount + \\" word(s).\\");\\n }\\n }\\n public int countWords(String input) {\\n\\n try {\\n Thread.sleep(10000);\\n } catch (InterruptedException e) {\\n throw new RuntimeException(e);\\n }\\n\\n if (input == null || input.isEmpty()) {\\n return 0;\\n }\\n\\n String[] words = input.split(\\"\\\\s+\\");\\n return words.length;\\n }\\n}\\n```\\n\\nFor the purposes of this blog, we will be using Elastic Cloud to capture the data generated by OpenTelemetry — [follow the instructions here](https://www.elastic.co/getting-started/observability/collect-and-analyze-logs#create-an-elastic-cloud-account) to [get started on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home).\\n\\nOnce you are started with Elastic Cloud, go grab the OpenTelemetry config from the APM pages:\\n\\n![apm agents](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-2-apm-agents.png)\\n\\nYou will need this later.\\n\\nFinally, [download the OpenTelemetry Agent](https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases).\\n\\n## Firing up the application and OpenTelemetry\\n\\nIf you start out with this simple application, build it and run it like so with the OpenTelemetry Agent, filling in the appropriate variables with those you got from earlier.\\n\\n```java\\njava -javaagent:opentelemetry-javaagent.jar -Dotel.exporter.otlp.endpoint=XX -Dotel.exporter.otlp.headers=XX -Dotel.metrics.exporter=otlp -Dotel.logs.exporter=otlp -Dotel.resource.attributes=XX -Dotel.service.name=your-service-name -jar simple-java-1.0-SNAPSHOT.jar\\n```\\n\\nYou will find nothing happens. The reason for this is that the OpenTelemetry Agent has no way of knowing what to monitor. The way that APM with automatic instrumentation works is that it “knows” about standard frameworks, like Spring or HTTPClient, and is able to get visibility by “injecting” trace code into those standard frameworks automatically.\\n\\nIt has no knowledge of org.davidgeorgehope.Main from our simple Java application.\\n\\nLuckily, there is a way we can add this using the [OpenTelemetry Extensions framework](https://opentelemetry.io/docs/instrumentation/java/automatic/extensions/).\\n\\n## The OpenTelemetry Extension\\n\\nIn the repository above, aside from the simple-java application, there is also a plugin for Elastic APM and an extension for OpenTelemetry. The relevant files for OpenTelemetry Extension are located [here](https://github.com/davidgeorgehope/custom-instrumentation-examples/tree/main/opentelemetry-custom-instrumentation/src/main/java/org/davidgeorgehope) — WordCountInstrumentation.java and WordCountInstrumentationModule.java .\\n\\nYou’ll notice that OpenTelemetry Extensions and Elastic APM Plugins both make use of Byte Buddy, which is a common library for code instrumentation. There are some key differences in the way the code is bootstrapped, though.\\n\\nThe WordCountInstrumentationModule class extends an OpenTelemtry specific class InstrumentationModule, whose purpose is to describe a set of TypeInstrumentation that need to be applied together to correctly instrument a specific library. The WordCountInstrumentation class is one such instance of a TypeInstrumentation.\\n\\nType instrumentations grouped in a module share helper classes, muzzle runtime checks, and applicable class loader criteria, and can only be enabled or disabled as a set.\\n\\nThis is a little bit different from how the Elastic APM Plugin works because the default method to to inject code with OpenTelemetry is inline (which is the default) with OpenTelemetry, and you can inject dependencies into the core application classloader using the InstrumentationModule configurations (as shown below). The Elastic APM method is safer as it allows isolation of helper classes and makes it easier to debug with normal IDEs we are contributing this method to OpenTelemetry. Here we inject the TypeInstrumentation class and the WordCountInstrumentation class into the classloader.\\n\\n```java\\n@Override\\n public List getAdditionalHelperClassNames() {\\n return List.of(WordCountInstrumentation.class.getName(),\\"io.opentelemetry.javaagent.extension.instrumentation.TypeInstrumentation\\");\\n }\\n```\\n\\nThe other interesting part of the TypeInstrumentation class is the setup.\\n\\nHere we give our instrumentation “group” a name. An InstrumentationModule needs to have at least one name. The user of the javaagent can suppress a chosen instrumentation by referring to it by one of its names. The instrumentation module names use kebab-case.\\n\\n```java\\npublic WordCountInstrumentationModule() {\\n super(\\"wordcount-demo\\", \\"wordcount\\");\\n }\\n```\\n\\nApart from this, we see methods in this class to specify the order of loading this relative to other instrumentation if needed, and we specify the class that extends TypeInstrumention and are responsible for the main bulk of the instrumentation work.\\n\\nLet\'s take a look at that WordCountInstrumention class, which extends TypeInstrumention now:\\n\\n```java\\n// The WordCountInstrumentation class implements the TypeInstrumentation interface.\\n// This allows us to specify which types of classes (based on some matching criteria) will have their methods instrumented.\\n\\npublic class WordCountInstrumentation implements TypeInstrumentation {\\n\\n // The typeMatcher method is used to define which classes the instrumentation should apply to.\\n // In this case, it\'s the \\"org.davidgeorgehope.Main\\" class.\\n @Override\\n public ElementMatcher typeMatcher() {\\n logger.info(\\"TEST typeMatcher\\");\\n return ElementMatchers.named(\\"org.davidgeorgehope.Main\\");\\n }\\n\\n // In the transform method, we specify which methods of the classes matched above will be instrumented,\\n // and also the advice (a piece of code) that will be added to these methods.\\n @Override\\n public void transform(TypeTransformer typeTransformer) {\\n logger.info(\\"TEST transform\\");\\n typeTransformer.applyAdviceToMethod(namedOneOf(\\"countWords\\"),this.getClass().getName() + \\"$WordCountAdvice\\");\\n }\\n\\n // The WordCountAdvice class contains the actual pieces of code (advices) that will be added to the instrumented methods.\\n @SuppressWarnings(\\"unused\\")\\n public static class WordCountAdvice {\\n // This advice is added at the beginning of the instrumented method (OnMethodEnter).\\n // It creates and starts a new span, and makes it active.\\n @Advice.OnMethodEnter(suppress = Throwable.class)\\n public static Scope onEnter(@Advice.Argument(value = 0) String input, @Advice.Local(\\"otelSpan\\") Span span) {\\n // Get a Tracer instance from OpenTelemetry.\\n Tracer tracer = GlobalOpenTelemetry.getTracer(\\"instrumentation-library-name\\",\\"semver:1.0.0\\");\\n System.out.print(\\"Entering method\\");\\n\\n // Start a new span with the name \\"mySpan\\".\\n span = tracer.spanBuilder(\\"mySpan\\").startSpan();\\n\\n // Make this new span the current active span.\\n Scope scope = span.makeCurrent();\\n\\n // Return the Scope instance. This will be used in the exit advice to end the span\'s scope.\\n return scope;\\n }\\n\\n // This advice is added at the end of the instrumented method (OnMethodExit).\\n // It first closes the span\'s scope, then checks if any exception was thrown during the method\'s execution.\\n // If an exception was thrown, it sets the span\'s status to ERROR and ends the span.\\n // If no exception was thrown, it sets a custom attribute \\"wordCount\\" on the span, and ends the span.\\n @Advice.OnMethodExit(onThrowable = Throwable.class, suppress = Throwable.class)\\n public static void onExit(@Advice.Return(readOnly = false) int wordCount,\\n @Advice.Thrown Throwable throwable,\\n @Advice.Local(\\"otelSpan\\") Span span,\\n @Advice.Enter Scope scope) {\\n // Close the scope to end it.\\n scope.close();\\n\\n // If an exception was thrown during the method\'s execution, set the span\'s status to ERROR.\\n if (throwable != null) {\\n span.setStatus(StatusCode.ERROR, \\"Exception thrown in method\\");\\n } else {\\n // If no exception was thrown, set a custom attribute \\"wordCount\\" on the span.\\n span.setAttribute(\\"wordCount\\", wordCount);\\n }\\n\\n // End the span. This makes it ready to be exported to the configured exporter (e.g. Elastic).\\n span.end();\\n }\\n }\\n}\\n```\\n\\nThe target class for our instrumentation is defined in the typeMatch method, and the method we want to instrument is defined in the transform method. We are targeting the Main class and the countWords method.\\n\\nAs you can see, we have an inner class here that does most of the work of defining an onEnter and onExit method, which tells us what to do when we enter the countWords method and when we exit the countWords method.\\n\\nIn the onEnter method, we set up a new OpenTelemetry span, and in the onExit method, we end the span. If the method successfully ends, we also grab the wordcount and append that to the attribute.\\n\\nNow let\'s take a look at what happens when we run this. The good news is that we have made this extremely simple by providing a dockerfile for your use to do all the work for you.\\n\\n## Pulling this all together\\n\\n[Clone the GitHub repository](https://github.com/davidgeorgehope/custom-instrumentation-examples/tree/main) if you have not already done so, and before continuing, let’s take a quick look at the dockerfile we are using.\\n\\n```dockerfile\\n# Build stage\\nFROM maven:3.8.7-openjdk-18 as build\\n\\nCOPY simple-java /home/app/simple-java\\nCOPY opentelemetry-custom-instrumentation /home/app/opentelemetry-custom-instrumentation\\n\\nWORKDIR /home/app/simple-java\\nRUN mvn install\\n\\nWORKDIR /home/app/opentelemetry-custom-instrumentation\\nRUN mvn install\\n\\n# Package stage\\nFROM maven:3.8.7-openjdk-18\\nCOPY --from=build /home/app/simple-java/target/simple-java-1.0-SNAPSHOT.jar /usr/local/lib/simple-java-1.0-SNAPSHOT.jar\\nCOPY --from=build /home/app/opentelemetry-custom-instrumentation/target/opentelemetry-custom-instrumentation-1.0-SNAPSHOT.jar /usr/local/lib/opentelemetry-custom-instrumentation-1.0-SNAPSHOT.jar\\n\\nWORKDIR /\\n\\nRUN curl -L -o opentelemetry-javaagent.jar https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/latest/download/opentelemetry-javaagent.jar\\n\\nCOPY start.sh /start.sh\\nRUN chmod +x /start.sh\\n\\nENTRYPOINT [\\"/start.sh\\"]\\n```\\n\\nThis dockerfile works in two parts: during the docker build process, we build the simple-java application from source followed by the custom instrumentation. After this, we download the latest OpenTelemetry Java Agent. During runtime, we simple execute the start.sh file described below:\\n\\n```bash\\n#!/bin/sh\\njava \\\\\\n-javaagent:/opentelemetry-javaagent.jar \\\\\\n-Dotel.exporter.otlp.endpoint=${SERVER_URL} \\\\\\n-Dotel.exporter.otlp.headers=\\"Authorization=Bearer ${SECRET_KEY}\\" \\\\\\n-Dotel.metrics.exporter=otlp \\\\\\n-Dotel.logs.exporter=otlp \\\\\\n-Dotel.resource.attributes=service.name=simple-java,service.version=1.0,deployment.environment=production \\\\\\n-Dotel.service.name=your-service-name \\\\\\n-Dotel.javaagent.extensions=/usr/local/lib/opentelemetry-custom-instrumentation-1.0-SNAPSHOT.jar \\\\\\n-Dotel.javaagent.debug=true \\\\\\n-jar /usr/local/lib/simple-java-1.0-SNAPSHOT.jar\\n```\\n\\nThere are two important things to note with this script: the first is that we start the javaagent parameter set to the opentelemetry-javaagent.jar — this will start the OpenTelemetry javaagent running, which starts before any code is executed.\\n\\nInside this jar there has to be a class with a premain method which the JVM will look for. This bootstraps the java agent. As described above, any bytecode that is compiled is essentially filtered through the javaagent code so it can modify the class before being executed.\\n\\nThe second important thing here is the configuration of the javaagent.extensions, which loads our extension that we built to add instrumentation for our simple-java application.\\n\\nNow run the following commands:\\n\\n```bash\\ndocker build -t djhope99/custom-otel-instrumentation:1 .\\ndocker run -it -e \'SERVER_URL=XXX\' -e \'SECRET_KEY=XX djhope99/custom-otel-instrumentation:1\\n```\\n\\nIf you use the SERVER_URL and SECRET_KEY you got earlier in here, you should see this connect to Elastic.\\n\\nWhen it starts up, it will ask you to enter a sentence, enter a few sentences, and press enter. Do this a few times — there is a sleep in here to force a long running transaction:\\n\\n![code](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-3-codeblack.png)\\n\\nEventually you will see the service show up in the service map:\\n\\n![services](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-4-services.png)\\n\\nTraces will appear:\\n\\n![service name](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-5-your-service-name.png)\\n\\nAnd in the span you will see the wordcount attribute we collected:\\n\\n![transaction details](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-6-transaction-details.png)\\n\\nThis can be used for further dashboarding and AI/ML, including anomaly detection if you need, which is easy to do, as you can see below.\\n\\nFirst click on the burger on the left side and select **Dashboard** to create a new dashboard:\\n\\n![analytics](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-7-manage-deployment-analytics.png)\\n\\nFrom here, click **Create Visualization**.\\n\\n![visualization](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-8-visualization.png)\\n\\nSearch for the wordcount label in the APM index as shown below:\\n\\n![dashboard](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-9-dashboard-word.png)\\n\\nAs you can see, because we created this attribute in the Span code as below with wordCount as a type “Integer,” we were able to automatically assign it as a numeric field in Elastic:\\n\\n```javascript\\nspan.setAttribute(\\"wordCount\\", wordCount);\\n```\\n\\nFrom here we can drag and drop it into the visualization for display on our Dashboard! Super easy.\\n\\n![dra and drop](/assets/images/extensions-opentelemetry-java-agent/elastic-blog-10-drag-drop.png)\\n\\n## In conclusion\\n\\nThis blog elucidates the invaluable role of OpenTelemetry Java Agent in filling the visibility gaps and obtaining crucial business monitoring data, especially when access to the source code is not feasible.\\n\\nThe blog unraveled the basic understanding of Java Agent, Bytecode, and Byte Buddy, followed by a comprehensive examination of the automatic instrumentation process with Byte Buddy.\\n\\nThe implementation of the OpenTelemetry Java Agent, using the Extensions framework, was demonstrated with the aid of a simple Java application, which underscored the agent\'s ability to inject trace code into the application to facilitate monitoring.\\n\\nIt detailed how to configure the agent and integrate OpenTelemetry Extension, and it outlined the operation of a sample application to help users comprehend the practical application of the information discussed. This instructive blog post is an excellent resource for SREs and IT Operations seeking to optimize their work with applications using OpenTelemetry\'s automatic instrumentation feature.\\n\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Monitor OpenAI API and GPT models with OpenTelemetry and Elastic](https://www.elastic.co/blog/monitor-openai-api-gpt-models-opentelemetry-elastic)\\n> - [Future proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n\\nDon’t have an Elastic Cloud account yet? Sign up [for Elastic Cloud](https://cloud.elastic.co/registration).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var i in e)o(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!y.call(n,a)&&a!==i&&o(n,a,{get:()=>e[a],enumerable:!(s=u(e,a))||s.enumerable});return n};var b=(n,e,i)=>(i=n!=null?p(g(n)):{},r(e||!n||!n.__esModule?o(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>r(o({},\\"__esModule\\",{value:!0}),n);var c=w((E,l)=>{l.exports=_jsx_runtime});var j={};f(j,{default:()=>d,frontmatter:()=>T});var t=b(c()),T={title:\\"Understanding APM: How to add extensions to the OpenTelemetry Java Agent\\",slug:\\"extensions-opentelemetry-java-agent\\",date:\\"2023-07-24\\",description:\\"This blog post provides a comprehensive guide for Site Reliability Engineers (SREs) and IT Operations to gain visibility and traceability into applications, especially those written with non-standard frameworks or without access to the source code.\\",author:[{slug:\\"david-hope\\"}],image:\\"flexible-implementation-1680X980.png\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"java\\"}]};function h(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.h2,{id:\\"without-code-access-sres-and-it-operations-cannot-always-get-the-visibility-they-need\\",children:\\"Without code access, SREs and IT Operations cannot always get the visibility they need\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As an SRE, have you ever had a situation where you were working on an application that was written with non-standard frameworks, or you wanted to get some interesting business data from an application (number of orders processed for example) but you didn\\\\u2019t have access to the source code?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We all know this can be a challenging scenario resulting in visibility gaps, inability to fully trace code end to end, and missing critical business monitoring data that is useful for understanding the true impact of issues.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"How can we solve this? One way we discussed in the following three blogs:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Create your own instrumentation with the Java Agent Plugin\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"How to capture custom metrics without app code changes using the Java Agent Plugin\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/regression-testing-your-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Regression testing your Java Agent Plugin\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This is where we develop a plugin for the Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" APM Agent to help get access to critical business data for monitoring and add tracing where none exists.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"What we will discuss in this blog is how you can do the same with the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/automatic/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java Agent\\"}),\\" using the Extensions framework.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"basic-concepts-how-apm-works\\",children:\\"Basic concepts: How APM works\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we continue, let\'s first understand a few basic concepts and terms.\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Java Agent:\\"}),\\" This is a tool that can be used to instrument (or modify) the bytecode of class files in the Java Virtual Machine (JVM). Java agents are used for many purposes like performance monitoring, logging, security, and more.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Bytecode:\\"}),\\" This is the intermediary code generated by the Java compiler from your Java source code. This code is interpreted or compiled on the fly by the JVM to produce machine code that can be executed.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Byte Buddy:\\"}),\\" Byte Buddy is a code generation and manipulation library for Java. It is used to create, modify, or adapt Java classes at runtime. In the context of a Java Agent, Byte Buddy provides a powerful and flexible way to modify bytecode. \\",(0,t.jsx)(e.strong,{children:\\"Both the Elastic APM Agent and the OpenTelemetry Agent use Byte Buddy under the covers.\\"})]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Now, let\'s talk about how automatic instrumentation works with Byte Buddy:\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Automatic instrumentation is the process by which an agent modifies the bytecode of your application\'s classes, often to insert monitoring code. The agent doesn\'t modify the source code directly, but rather the bytecode that is loaded into the JVM. This is done while the JVM is loading the classes, so the modifications are in effect during runtime.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here\'s a simplified explanation of the process:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Start the JVM with the agent:\\"}),\\" When starting your Java application, you specify the Java agent with the -javaagent command line option. This instructs the JVM to load your agent before the main method of your application is invoked. At this point, the agent has the opportunity to set up class transformers.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Register a class file transformer with Byte Buddy:\\"}),\\" Your agent will register a class file transformer with Byte Buddy. A transformer is a piece of code that is invoked every time a class is loaded into the JVM. This transformer receives the bytecode of the class and it can modify this bytecode before the class is actually used.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Transform the bytecode:\\"}),` When your transformer is invoked, it will use Byte Buddy\'s API to modify the bytecode. Byte Buddy allows you to specify your transformations in a high-level, expressive way rather than manually writing complex bytecode. For example, you could specify a certain class and method within that class that you want to instrument and provide an \\"interceptor\\" that will add new behavior to that method.`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Use the transformed classes:\\"}),\\" Once the agent has set up its transformers, the JVM continues to load classes as usual. Each time a class is loaded, your transformers are invoked, allowing them to modify the bytecode. Your application then uses these transformed classes as if they were the original ones, but they now have the extra behavior that you\'ve injected through your interceptor.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-1-flowchart-process.png\\",alt:\\"flowchart process\\",width:\\"736\\",height:\\"289\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In essence, automatic instrumentation with Byte Buddy is about modifying the behavior of your Java classes at runtime, without needing to alter the source code directly. This is especially useful for cross-cutting concerns like logging, monitoring, or security, as it allows you to centralize this code in your Java Agent, rather than scattering it throughout your application.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"There is a really simple application in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/custom-instrumentation-examples\\",rel:\\"nofollow\\",children:\\"this GitHub repository\\"}),\\" that is used throughout this blog. What it does is it simply asks you to input some text and then it counts the number of words.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"It\\\\u2019s also listed below:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`package org.davidgeorgehope;\\nimport java.util.Scanner;\\nimport java.util.logging.Logger;\\n\\npublic class Main {\\n private static Logger logger = Logger.getLogger(Main.class.getName());\\n\\n public static void main(String[] args) {\\n Scanner scanner = new Scanner(System.in);\\n while (true) {\\n System.out.println(\\"Please enter your sentence:\\");\\n String input = scanner.nextLine();\\n Main main = new Main();\\n int wordCount = main.countWords(input);\\n System.out.println(\\"The input contains \\" + wordCount + \\" word(s).\\");\\n }\\n }\\n public int countWords(String input) {\\n\\n try {\\n Thread.sleep(10000);\\n } catch (InterruptedException e) {\\n throw new RuntimeException(e);\\n }\\n\\n if (input == null || input.isEmpty()) {\\n return 0;\\n }\\n\\n String[] words = input.split(\\"\\\\\\\\s+\\");\\n return words.length;\\n }\\n}\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For the purposes of this blog, we will be using Elastic Cloud to capture the data generated by OpenTelemetry \\\\u2014 \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/getting-started/observability/collect-and-analyze-logs#create-an-elastic-cloud-account\\",rel:\\"nofollow\\",children:\\"follow the instructions here\\"}),\\" to \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you are started with Elastic Cloud, go grab the OpenTelemetry config from the APM pages:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-2-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1210\\",height:\\"986\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will need this later.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Finally, \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases\\",rel:\\"nofollow\\",children:\\"download the OpenTelemetry Agent\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"firing-up-the-application-and-opentelemetry\\",children:\\"Firing up the application and OpenTelemetry\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you start out with this simple application, build it and run it like so with the OpenTelemetry Agent, filling in the appropriate variables with those you got from earlier.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`java -javaagent:opentelemetry-javaagent.jar -Dotel.exporter.otlp.endpoint=XX -Dotel.exporter.otlp.headers=XX -Dotel.metrics.exporter=otlp -Dotel.logs.exporter=otlp -Dotel.resource.attributes=XX -Dotel.service.name=your-service-name -jar simple-java-1.0-SNAPSHOT.jar\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will find nothing happens. The reason for this is that the OpenTelemetry Agent has no way of knowing what to monitor. The way that APM with automatic instrumentation works is that it \\\\u201Cknows\\\\u201D about standard frameworks, like Spring or HTTPClient, and is able to get visibility by \\\\u201Cinjecting\\\\u201D trace code into those standard frameworks automatically.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"It has no knowledge of org.davidgeorgehope.Main from our simple Java application.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Luckily, there is a way we can add this using the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/automatic/extensions/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Extensions framework\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"the-opentelemetry-extension\\",children:\\"The OpenTelemetry Extension\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the repository above, aside from the simple-java application, there is also a plugin for Elastic APM and an extension for OpenTelemetry. The relevant files for OpenTelemetry Extension are located \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/custom-instrumentation-examples/tree/main/opentelemetry-custom-instrumentation/src/main/java/org/davidgeorgehope\\",rel:\\"nofollow\\",children:\\"here\\"}),\\" \\\\u2014 WordCountInstrumentation.java and WordCountInstrumentationModule.java .\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"You\\\\u2019ll notice that OpenTelemetry Extensions and Elastic APM Plugins both make use of Byte Buddy, which is a common library for code instrumentation. There are some key differences in the way the code is bootstrapped, though.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The WordCountInstrumentationModule class extends an OpenTelemtry specific class InstrumentationModule, whose purpose is to describe a set of TypeInstrumentation that need to be applied together to correctly instrument a specific library. The WordCountInstrumentation class is one such instance of a TypeInstrumentation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Type instrumentations grouped in a module share helper classes, muzzle runtime checks, and applicable class loader criteria, and can only be enabled or disabled as a set.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is a little bit different from how the Elastic APM Plugin works because the default method to to inject code with OpenTelemetry is inline (which is the default) with OpenTelemetry, and you can inject dependencies into the core application classloader using the InstrumentationModule configurations (as shown below). The Elastic APM method is safer as it allows isolation of helper classes and makes it easier to debug with normal IDEs we are contributing this method to OpenTelemetry. Here we inject the TypeInstrumentation class and the WordCountInstrumentation class into the classloader.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`@Override\\n public List getAdditionalHelperClassNames() {\\n return List.of(WordCountInstrumentation.class.getName(),\\"io.opentelemetry.javaagent.extension.instrumentation.TypeInstrumentation\\");\\n }\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The other interesting part of the TypeInstrumentation class is the setup.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here we give our instrumentation \\\\u201Cgroup\\\\u201D a name. An InstrumentationModule needs to have at least one name. The user of the javaagent can suppress a chosen instrumentation by referring to it by one of its names. The instrumentation module names use kebab-case.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`public WordCountInstrumentationModule() {\\n super(\\"wordcount-demo\\", \\"wordcount\\");\\n }\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Apart from this, we see methods in this class to specify the order of loading this relative to other instrumentation if needed, and we specify the class that extends TypeInstrumention and are responsible for the main bulk of the instrumentation work.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\'s take a look at that WordCountInstrumention class, which extends TypeInstrumention now:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`// The WordCountInstrumentation class implements the TypeInstrumentation interface.\\n// This allows us to specify which types of classes (based on some matching criteria) will have their methods instrumented.\\n\\npublic class WordCountInstrumentation implements TypeInstrumentation {\\n\\n // The typeMatcher method is used to define which classes the instrumentation should apply to.\\n // In this case, it\'s the \\"org.davidgeorgehope.Main\\" class.\\n @Override\\n public ElementMatcher typeMatcher() {\\n logger.info(\\"TEST typeMatcher\\");\\n return ElementMatchers.named(\\"org.davidgeorgehope.Main\\");\\n }\\n\\n // In the transform method, we specify which methods of the classes matched above will be instrumented,\\n // and also the advice (a piece of code) that will be added to these methods.\\n @Override\\n public void transform(TypeTransformer typeTransformer) {\\n logger.info(\\"TEST transform\\");\\n typeTransformer.applyAdviceToMethod(namedOneOf(\\"countWords\\"),this.getClass().getName() + \\"$WordCountAdvice\\");\\n }\\n\\n // The WordCountAdvice class contains the actual pieces of code (advices) that will be added to the instrumented methods.\\n @SuppressWarnings(\\"unused\\")\\n public static class WordCountAdvice {\\n // This advice is added at the beginning of the instrumented method (OnMethodEnter).\\n // It creates and starts a new span, and makes it active.\\n @Advice.OnMethodEnter(suppress = Throwable.class)\\n public static Scope onEnter(@Advice.Argument(value = 0) String input, @Advice.Local(\\"otelSpan\\") Span span) {\\n // Get a Tracer instance from OpenTelemetry.\\n Tracer tracer = GlobalOpenTelemetry.getTracer(\\"instrumentation-library-name\\",\\"semver:1.0.0\\");\\n System.out.print(\\"Entering method\\");\\n\\n // Start a new span with the name \\"mySpan\\".\\n span = tracer.spanBuilder(\\"mySpan\\").startSpan();\\n\\n // Make this new span the current active span.\\n Scope scope = span.makeCurrent();\\n\\n // Return the Scope instance. This will be used in the exit advice to end the span\'s scope.\\n return scope;\\n }\\n\\n // This advice is added at the end of the instrumented method (OnMethodExit).\\n // It first closes the span\'s scope, then checks if any exception was thrown during the method\'s execution.\\n // If an exception was thrown, it sets the span\'s status to ERROR and ends the span.\\n // If no exception was thrown, it sets a custom attribute \\"wordCount\\" on the span, and ends the span.\\n @Advice.OnMethodExit(onThrowable = Throwable.class, suppress = Throwable.class)\\n public static void onExit(@Advice.Return(readOnly = false) int wordCount,\\n @Advice.Thrown Throwable throwable,\\n @Advice.Local(\\"otelSpan\\") Span span,\\n @Advice.Enter Scope scope) {\\n // Close the scope to end it.\\n scope.close();\\n\\n // If an exception was thrown during the method\'s execution, set the span\'s status to ERROR.\\n if (throwable != null) {\\n span.setStatus(StatusCode.ERROR, \\"Exception thrown in method\\");\\n } else {\\n // If no exception was thrown, set a custom attribute \\"wordCount\\" on the span.\\n span.setAttribute(\\"wordCount\\", wordCount);\\n }\\n\\n // End the span. This makes it ready to be exported to the configured exporter (e.g. Elastic).\\n span.end();\\n }\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The target class for our instrumentation is defined in the typeMatch method, and the method we want to instrument is defined in the transform method. We are targeting the Main class and the countWords method.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see, we have an inner class here that does most of the work of defining an onEnter and onExit method, which tells us what to do when we enter the countWords method and when we exit the countWords method.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the onEnter method, we set up a new OpenTelemetry span, and in the onExit method, we end the span. If the method successfully ends, we also grab the wordcount and append that to the attribute.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now let\'s take a look at what happens when we run this. The good news is that we have made this extremely simple by providing a dockerfile for your use to do all the work for you.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"pulling-this-all-together\\",children:\\"Pulling this all together\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/custom-instrumentation-examples/tree/main\\",rel:\\"nofollow\\",children:\\"Clone the GitHub repository\\"}),\\" if you have not already done so, and before continuing, let\\\\u2019s take a quick look at the dockerfile we are using.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-dockerfile\\",children:`# Build stage\\nFROM maven:3.8.7-openjdk-18 as build\\n\\nCOPY simple-java /home/app/simple-java\\nCOPY opentelemetry-custom-instrumentation /home/app/opentelemetry-custom-instrumentation\\n\\nWORKDIR /home/app/simple-java\\nRUN mvn install\\n\\nWORKDIR /home/app/opentelemetry-custom-instrumentation\\nRUN mvn install\\n\\n# Package stage\\nFROM maven:3.8.7-openjdk-18\\nCOPY --from=build /home/app/simple-java/target/simple-java-1.0-SNAPSHOT.jar /usr/local/lib/simple-java-1.0-SNAPSHOT.jar\\nCOPY --from=build /home/app/opentelemetry-custom-instrumentation/target/opentelemetry-custom-instrumentation-1.0-SNAPSHOT.jar /usr/local/lib/opentelemetry-custom-instrumentation-1.0-SNAPSHOT.jar\\n\\nWORKDIR /\\n\\nRUN curl -L -o opentelemetry-javaagent.jar https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/latest/download/opentelemetry-javaagent.jar\\n\\nCOPY start.sh /start.sh\\nRUN chmod +x /start.sh\\n\\nENTRYPOINT [\\"/start.sh\\"]\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This dockerfile works in two parts: during the docker build process, we build the simple-java application from source followed by the custom instrumentation. After this, we download the latest OpenTelemetry Java Agent. During runtime, we simple execute the start.sh file described below:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`#!/bin/sh\\njava \\\\\\\\\\n-javaagent:/opentelemetry-javaagent.jar \\\\\\\\\\n-Dotel.exporter.otlp.endpoint=\\\\${SERVER_URL} \\\\\\\\\\n-Dotel.exporter.otlp.headers=\\"Authorization=Bearer \\\\${SECRET_KEY}\\" \\\\\\\\\\n-Dotel.metrics.exporter=otlp \\\\\\\\\\n-Dotel.logs.exporter=otlp \\\\\\\\\\n-Dotel.resource.attributes=service.name=simple-java,service.version=1.0,deployment.environment=production \\\\\\\\\\n-Dotel.service.name=your-service-name \\\\\\\\\\n-Dotel.javaagent.extensions=/usr/local/lib/opentelemetry-custom-instrumentation-1.0-SNAPSHOT.jar \\\\\\\\\\n-Dotel.javaagent.debug=true \\\\\\\\\\n-jar /usr/local/lib/simple-java-1.0-SNAPSHOT.jar\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are two important things to note with this script: the first is that we start the javaagent parameter set to the opentelemetry-javaagent.jar \\\\u2014 this will start the OpenTelemetry javaagent running, which starts before any code is executed.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Inside this jar there has to be a class with a premain method which the JVM will look for. This bootstraps the java agent. As described above, any bytecode that is compiled is essentially filtered through the javaagent code so it can modify the class before being executed.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The second important thing here is the configuration of the javaagent.extensions, which loads our extension that we built to add instrumentation for our simple-java application.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now run the following commands:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t djhope99/custom-otel-instrumentation:1 .\\ndocker run -it -e \'SERVER_URL=XXX\' -e \'SECRET_KEY=XX djhope99/custom-otel-instrumentation:1\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you use the SERVER_URL and SECRET_KEY you got earlier in here, you should see this connect to Elastic.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"When it starts up, it will ask you to enter a sentence, enter a few sentences, and press enter. Do this a few times \\\\u2014 there is a sleep in here to force a long running transaction:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-3-codeblack.png\\",alt:\\"code\\",width:\\"498\\",height:\\"300\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Eventually you will see the service show up in the service map:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-4-services.png\\",alt:\\"services\\",width:\\"1352\\",height:\\"911\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Traces will appear:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-5-your-service-name.png\\",alt:\\"service name\\",width:\\"1335\\",height:\\"914\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And in the span you will see the wordcount attribute we collected:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-6-transaction-details.png\\",alt:\\"transaction details\\",width:\\"1370\\",height:\\"921\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This can be used for further dashboarding and AI/ML, including anomaly detection if you need, which is easy to do, as you can see below.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"First click on the burger on the left side and select \\",(0,t.jsx)(e.strong,{children:\\"Dashboard\\"}),\\" to create a new dashboard:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-7-manage-deployment-analytics.png\\",alt:\\"analytics\\",width:\\"264\\",height:\\"455\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"From here, click \\",(0,t.jsx)(e.strong,{children:\\"Create Visualization\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-8-visualization.png\\",alt:\\"visualization\\",width:\\"919\\",height:\\"495\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Search for the wordcount label in the APM index as shown below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-9-dashboard-word.png\\",alt:\\"dashboard\\",width:\\"964\\",height:\\"565\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see, because we created this attribute in the Span code as below with wordCount as a type \\\\u201CInteger,\\\\u201D we were able to automatically assign it as a numeric field in Elastic:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-javascript\\",children:`span.setAttribute(\\"wordCount\\", wordCount);\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"From here we can drag and drop it into the visualization for display on our Dashboard! Super easy.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/extensions-opentelemetry-java-agent/elastic-blog-10-drag-drop.png\\",alt:\\"dra and drop\\",width:\\"1999\\",height:\\"879\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"in-conclusion\\",children:\\"In conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This blog elucidates the invaluable role of OpenTelemetry Java Agent in filling the visibility gaps and obtaining crucial business monitoring data, especially when access to the source code is not feasible.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The blog unraveled the basic understanding of Java Agent, Bytecode, and Byte Buddy, followed by a comprehensive examination of the automatic instrumentation process with Byte Buddy.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The implementation of the OpenTelemetry Java Agent, using the Extensions framework, was demonstrated with the aid of a simple Java application, which underscored the agent\'s ability to inject trace code into the application to facilitate monitoring.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"It detailed how to configure the agent and integrate OpenTelemetry Extension, and it outlined the operation of a sample application to help users comprehend the practical application of the information discussed. This instructive blog post is an excellent resource for SREs and IT Operations seeking to optimize their work with applications using OpenTelemetry\'s automatic instrumentation feature.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/monitor-openai-api-gpt-models-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"Monitor OpenAI API and GPT models with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? Sign up \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"for Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(j);})();\\n;return Component;"},"_id":"articles/how-extensions-opentelemetry-java-agent-and-how-apm-works.mdx","_raw":{"sourceFilePath":"articles/how-extensions-opentelemetry-java-agent-and-how-apm-works.mdx","sourceFileName":"how-extensions-opentelemetry-java-agent-and-how-apm-works.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/how-extensions-opentelemetry-java-agent-and-how-apm-works"},"type":"Article","imageUrl":"/assets/images/extensions-opentelemetry-java-agent/flexible-implementation-1680X980.png","readingTime":"19 min read","url":"/extensions-opentelemetry-java-agent","headings":[{"level":2,"title":"Without code access, SREs and IT Operations cannot always get the visibility they need","href":"#without-code-access-sres-and-it-operations-cannot-always-get-the-visibility-they-need"},{"level":2,"title":"Basic concepts: How APM works","href":"#basic-concepts-how-apm-works"},{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":2,"title":"Firing up the application and OpenTelemetry","href":"#firing-up-the-application-and-opentelemetry"},{"level":2,"title":"The OpenTelemetry Extension","href":"#the-opentelemetry-extension"},{"level":2,"title":"Pulling this all together","href":"#pulling-this-all-together"},{"level":2,"title":"In conclusion","href":"#in-conclusion"}]},{"title":"How to easily add application monitoring in Kubernetes pods","slug":"application-monitoring-kubernetes-pods","date":"2024-01-17","description":"This blog walks through installing the Elastic APM K8s Attacher and shows how to configure your system for both common and non-standard deployments of Elastic APM agents.","image":"139689_-_Blog_Header_Banner_V1.jpg","author":[{"slug":"jack-shirazi","type":"Author","_raw":{}},{"slug":"sylvain-juge","type":"Author","_raw":{}},{"slug":"alexander-wert","type":"Author","_raw":{}}],"subtitle":"The Elastic APM K8s Attacher lets the Elastic APM agent auto-attach to the application in your pods by adding just one annotation to your deployment","tags":[{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"elastic-agent","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe [Elastic\xae APM K8s Attacher](https://www.elastic.co/guide/en/apm/attacher/current/index.html) allows auto-installation of Elastic APM application agents (e.g., the Elastic APM Java agent) into applications running in your Kubernetes clusters. The mechanism uses a [mutating webhook](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/), which is a standard Kubernetes component, but you don’t need to know all the details to use the Attacher. Essentially, you can install the Attacher, add one annotation to any Kubernetes deployment that has an application you want monitored, and that’s it!\\n\\nIn this blog, we’ll walk through a full example from scratch using a Java application. Apart from the Java code and using a JVM for the application, everything else works the same for the other languages supported by the Attacher.\\n\\n## Prerequisites\\n\\nThis walkthrough assumes that the following are already installed on the system: JDK 17, Docker, Kubernetes, and Helm.\\n\\n## The example application\\n\\nWhile the application (shown below) is a Java application, it would be easily implemented in any language, as it is just a simple loop that every 2 seconds calls the method chain methodA-\\\\>methodB-\\\\>methodC-\\\\>methodD, with methodC sleeping for 10 milliseconds and methodD sleeping for 200 milliseconds. The choice of application is just to be able to clearly display in the Elastic APM UI that the application is being monitored.\\n\\nThe Java application in full is shown here:\\n\\n```java\\npackage test;\\n\\npublic class Testing implements Runnable {\\n\\n public static void main(String[] args) {\\n new Thread(new Testing()).start();\\n }\\n\\n public void run()\\n {\\n while(true) {\\n try {Thread.sleep(2000);} catch (InterruptedException e) {}\\n methodA();\\n }\\n }\\n\\n public void methodA() {methodB();}\\n\\n public void methodB() {methodC();}\\n\\n public void methodC() {\\n System.out.println(\\"methodC executed\\");\\n try {Thread.sleep(10);} catch (InterruptedException e) {}\\n methodD();\\n }\\n\\n public void methodD() {\\n System.out.println(\\"methodD executed\\");\\n try {Thread.sleep(200);} catch (InterruptedException e) {}\\n }\\n}\\n```\\n\\nWe created a Docker image containing that simple Java application for you that can be pulled from the following Docker repository:\\n\\n```bash\\ndocker.elastic.co/demos/apm/k8s-webhook-test\\n```\\n\\n## Deploy the pod\\n\\nFirst we need a deployment config. We’ll call the config file webhook-test.yaml, and the contents are pretty minimal — just pull the image and run that as a pod & container called webhook-test in the default namespace:\\n\\n```yaml\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: webhook-test\\n labels:\\n app: webhook-test\\nspec:\\n containers:\\n - image: docker.elastic.co/demos/apm/k8s-webhook-test\\n imagePullPolicy: Always\\n name: webhook-test\\n```\\n\\nThis can be deployed normally using kubectl:\\n\\n```yaml\\nkubectl apply -f webhook-test.yaml\\n```\\n\\nThe result is exactly as expected:\\n\\n```bash\\n$ kubectl get pods\\nNAME READY STATUS RESTARTS AGE\\nwebhook-test 1/1 Running 0 10s\\n\\n$ kubectl logs webhook-test\\nmethodC executed\\nmethodD executed\\nmethodC executed\\nmethodD executed\\n```\\n\\nSo far, this is just setting up a standard Kubernetes application with no APM monitoring. Now we get to the interesting bit: adding in auto-instrumentation.\\n\\n## Install Elastic APM K8s Attacher\\n\\nThe first step is to install the [Elastic APM K8s Attacher](https://www.elastic.co/guide/en/apm/attacher/current/index.html). This only needs to be done once for the cluster — once installed, it is always available. Before installation, we will define where the monitored data will go. As you will see later, we can decide or change this any time. For now, we’ll specify our own Elastic APM server, which is at https://myserver.somecloud:443 — we also have a secret token for authorization to that Elastic APM server, which has value MY_SECRET_TOKEN. (If you want to set up a quick test Elastic APM server, you can do so at [https://cloud.elastic.co/](https://cloud.elastic.co/)).\\n\\nThere are two additional environment variables set for the application that are not generally needed but will help when we see the resulting UI content toward the end of the walkthrough (when the agent is auto-installed, these two variables tell the agent what name to give this application in the UI and what method to trace). Now we just need to define the custom yaml file to hold these. On installation, the custom yaml will be merged into the yaml for the Attacher:\\n\\n```yaml\\napm:\\n secret_token: MY_SECRET_TOKEN\\n namespaces:\\n - default\\nwebhookConfig:\\n agents:\\n java:\\n environment:\\n ELASTIC_APM_SERVER_URL: \\"https://myserver.somecloud:443\\"\\n ELASTIC_APM_TRACE_METHODS: \\"test.Testing#methodB\\"\\n ELASTIC_APM_SERVICE_NAME: \\"webhook-test\\"\\n```\\n\\nThat custom.yaml file is all we need to install the attacher (note we’ve only specified the default namespace for agent auto-installation for now — this can be easily changed, as you’ll see later). Next we’ll add the Elastic charts to helm — this only needs to be done once, then all Elastic charts are available to helm. This is the usual helm add repo command, specifically:\\n\\n```bash\\nhelm repo add elastic https://helm.elastic.co\\n```\\n\\nNow the Elastic charts are available for installation (helm search repo would show you all the available charts). We’re going to use “elastic-webhook” as the name to install into, resulting in the following installation command:\\n\\n```bash\\nhelm install elastic-webhook elastic/apm-attacher --namespace=elastic-apm --create-namespace --values custom.yaml\\n```\\n\\nAnd that’s it, we now have the Elastic APM K8s Attacher installed and set to send data to the APM server defined in the custom.yaml file! (You can confirm installation with a helm list -A if you need.)\\n\\n## Auto-install the Java agent\\n\\nThe Elastic APM K8s Attacher is installed, but it doesn’t auto-install the APM application agents into every pod — that could lead to problems! Instead the Attacher is deliberately limited to auto-install agents into deployments defined a) by the namespaces listed in the custom.yaml, and b) to those deployments in those namespaces that have a specific annotation “co.elastic.apm/attach.”\\n\\nSo for now, restarting the webhook-test pod we created above won’t have any different effect on the pod, as it isn’t yet set to be monitored. What we need to do is add the annotation. Specifically, we need to add the annotation using the default agent configuration that was installed with the Attacher called “java” for the Java agent (we’ll see later how that agent configuration is altered — the default configuration installs the latest agent version and leaves everything else default for that version). So adding that annotation in to webhook-test yaml gives us the new yaml file contents (the additional config is shown labelled (1)):\\n\\n```yaml\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: webhook-test\\n annotations: #(1)\\n co.elastic.apm/attach: java #(1)\\n labels:\\n app: webhook-test\\nspec:\\n containers:\\n - image: docker.elastic.co/demos/apm/k8s-webhook-test\\n imagePullPolicy: Always\\n name: webhook-test\\n```\\n\\nApplying this change gives us the application now monitored:\\n\\n```bash\\n$ kubectl delete -f webhook-test.yaml\\npod \\"webhook-test\\" deleted\\n$ kubectl apply -f webhook-test.yaml\\npod/webhook-test created\\n$ kubectl logs webhook-test\\n… StartupInfo - Starting Elastic APM 1.45.0 …\\n```\\n\\nAnd since the agent is now feeding data to our APM server, we can now see it in the UI:\\n\\n![webhook-test](/assets/images/application-monitoring-kubernetes-pods/webhook-test-k8s-blog.png)\\n\\nNote that the agent identifies Testing.methodB method as a trace root because of the ELASTIC_APM_TRACE_METHODS environment variable set to test.Testing#methodB in the custom.yaml — this tells the agent to specifically trace that method. The time taken by that method will be available in the UI for each invocation, but we don’t see the sub-methods . . . currently. In the next section, we’ll see how easy it is to customize the Attacher, and in doing so we’ll see more detail about the method chain being executed in the application.\\n\\n## Customizing the agents\\n\\nIn your systems, you’ll likely have development, testing, and production environments. You’ll want to specify the version of the agent to use rather than just pull the latest version whatever that is, you’ll want to have debug on for some applications or instances, and you’ll want to have specific options set to specific values. This sounds like a lot of effort, but the attacher lets you enable these kinds of changes in a very simple way. In this section, we’ll add a configuration that specifies all these changes and we can see just how easy it is to configure and enable it.\\n\\nWe start at the custom.yaml file we defined above. This is the file that gets merged into the Attacher. Adding a new configuration with all the items listed in the last paragraph is easy — though first we need to decide a name for our new configuration. We’ll call it “java-interesting” here. The new custom.yaml in full is (the first part is just the same as before, the new config is simply appended):\\n\\n```yaml\\napm:\\n secret_token: MY_SECRET_TOKEN\\n namespaces:\\n - default\\nwebhookConfig:\\n agents:\\n java:\\n environment:\\n ELASTIC_APM_SERVER_URL: \\"https://myserver.somecloud:443\\"\\n ELASTIC_APM_TRACE_METHODS: \\"test.Testing#methodB\\"\\n ELASTIC_APM_SERVICE_NAME: \\"webhook-test\\"\\n java-interesting:\\n image: docker.elastic.co/observability/apm-agent-java:1.52.1\\n artifact: \\"/usr/agent/elastic-apm-agent.jar\\"\\n environment:\\n ELASTIC_APM_SERVER_URL: \\"https://myserver.somecloud:443\\"\\n ELASTIC_APM_TRACE_METHODS: \\"test.Testing#methodB\\"\\n ELASTIC_APM_SERVICE_NAME: \\"webhook-test\\"\\n ELASTIC_APM_ENVIRONMENT: \\"testing\\"\\n ELASTIC_APM_LOG_LEVEL: \\"debug\\"\\n ELASTIC_APM_PROFILING_INFERRED_SPANS_ENABLED: \\"true\\"\\n JAVA_TOOL_OPTIONS: \\"-javaagent:/elastic/apm/agent/elastic-apm-agent.jar\\"\\n```\\n\\nBreaking the additional config down, we have:\\n\\n- The name of the new config java-interesting\\n\\n- The APM Java agent image docker.elastic.co/observability/apm-agent-java\\n - With a specific version 1.43.0 instead of latest\\n- We need to specify the agent jar location (the attacher puts it here)\\n\\n - artifact: \\"/usr/agent/elastic-apm-agent.jar\\"\\n\\n- And then the environment variables\\n\\n- ELASTIC_APM_SERVER_URL as before\\n\\n- ELASTIC_APM_ENVIRONMENT set to testing, useful when looking in the UI\\n\\n- ELASTIC_APM_LOG_LEVEL set to debug for more detailed agent output\\n\\n- ELASTIC_APM_PROFILING_INFERRED_SPANS_ENABLED turning this on (setting to true) will give us additional interesting information about the method chain being executed in the application\\n\\n- And lastly we need to set JAVA_TOOL_OPTIONS to the enable starting the agent \\"-javaagent:/elastic/apm/agent/elastic-apm-agent.jar\\" — this is fundamentally how the attacher auto-attaches the Java agent\\n\\nMore configurations and details about configuration options are [here for the Java agent](https://www.elastic.co/guide/en/apm/agent/java/current/configuration.html), and [other language agents](https://www.elastic.co/guide/en/apm/agent/index.html) are also available.\\n\\n## The application traced with the new configuration\\n\\nAnd finally we just need to upgrade the attacher with the changed custom.yaml:\\n\\n```bash\\nhelm upgrade elastic-webhook elastic/apm-attacher --namespace=elastic-apm --create-namespace --values custom.yaml\\n```\\n\\nThis is the same command as the original install, but now using upgrade. That’s it — add config to the custom.yaml and upgrade the attacher, and it’s done! Simple.\\n\\nOf course we still need to use the new config on an app. In this case, we’ll edit the existing webhook-test.yaml file, replacing java with java-interesting, so the annotation line is now:\\n\\n```yaml\\nco.elastic.apm/attach: java-interesting\\n```\\n\\nApplying the new pod config and restarting the pod, you can see the logs now hold debug output:\\n\\n```bash\\n$ kubectl delete -f webhook-test.yaml\\npod \\"webhook-test\\" deleted\\n$ kubectl apply -f webhook-test.yaml\\npod/webhook-test created\\n$ kubectl logs webhook-test\\n… StartupInfo - Starting Elastic APM 1.44.0 …\\n… DEBUG co.elastic.apm.agent. …\\n… DEBUG co.elastic.apm.agent. …\\n```\\n\\nMore interesting is the UI. Now that inferred spans is on, the full method chain is visible.\\n\\n![trace sample](/assets/images/application-monitoring-kubernetes-pods/trace-sample-k8s-blog.png)\\n\\nThis gives the details for methodB (it takes 211 milliseconds because it calls methodC - 10ms - which calls methodD - 200ms). The times for methodC and methodD are inferred rather than recorded, (inferred rather than traced — if you needed accurate times you would instead add the methods to trace_methods and have them traced too).\\n\\n## Note on the ECK operator\\n\\nThe [Elastic Cloud on Kubernetes operator](https://www.elastic.co/guide/en/cloud-on-k8s/master/k8s-overview.html) allows you to install and manage a number of other Elastic components on Kubernetes. At the time of publication of this blog, the [Elastic APM K8s Attacher](https://www.elastic.co/guide/en/apm/attacher/current/index.html) is a separate component, and there is no conflict between these management mechanisms — they apply to different components and are independent of each other.\\n\\n## Try it yourself!\\n\\nThis walkthrough is easily repeated on your system, and you can make it more useful by replacing the example application with your own and the Docker registry with the one you use.\\n\\n[Learn more about real-time monitoring with Kubernetes and Elastic Observability](https://www.elastic.co/observability/kubernetes-monitoring).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var a in e)o(n,a,{get:e[a],enumerable:!0})},s=(n,e,a,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of u(e))!w.call(n,i)&&i!==a&&o(n,i,{get:()=>e[i],enumerable:!(l=m(e,i))||l.enumerable});return n};var y=(n,e,a)=>(a=n!=null?p(g(n)):{},s(e||!n||!n.__esModule?o(a,\\"default\\",{value:n,enumerable:!0}):a,n)),A=n=>s(o({},\\"__esModule\\",{value:!0}),n);var r=f((T,h)=>{h.exports=_jsx_runtime});var E={};b(E,{default:()=>d,frontmatter:()=>v});var t=y(r()),v={title:\\"How to easily add application monitoring in Kubernetes pods\\",slug:\\"application-monitoring-kubernetes-pods\\",date:\\"2024-01-17\\",subtitle:\\"The Elastic APM K8s Attacher lets the Elastic APM agent auto-attach to the application in your pods by adding just one annotation to your deployment\\",description:\\"This blog walks through installing the Elastic APM K8s Attacher and shows how to configure your system for both common and non-standard deployments of Elastic APM agents.\\",author:[{slug:\\"jack-shirazi\\"},{slug:\\"sylvain-juge\\"},{slug:\\"alexander-wert\\"}],image:\\"139689_-_Blog_Header_Banner_V1.jpg\\",tags:[{slug:\\"kubernetes\\"},{slug:\\"elastic-agent\\"},{slug:\\"apm\\"}]};function c(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/attacher/current/index.html\\",rel:\\"nofollow\\",children:\\"Elastic\\\\xAE APM K8s Attacher\\"}),\\" allows auto-installation of Elastic APM application agents (e.g., the Elastic APM Java agent) into applications running in your Kubernetes clusters. The mechanism uses a \\",(0,t.jsx)(e.a,{href:\\"https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/\\",rel:\\"nofollow\\",children:\\"mutating webhook\\"}),\\", which is a standard Kubernetes component, but you don\\\\u2019t need to know all the details to use the Attacher. Essentially, you can install the Attacher, add one annotation to any Kubernetes deployment that has an application you want monitored, and that\\\\u2019s it!\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we\\\\u2019ll walk through a full example from scratch using a Java application. Apart from the Java code and using a JVM for the application, everything else works the same for the other languages supported by the Attacher.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This walkthrough assumes that the following are already installed on the system: JDK 17, Docker, Kubernetes, and Helm.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"the-example-application\\",children:\\"The example application\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"While the application (shown below) is a Java application, it would be easily implemented in any language, as it is just a simple loop that every 2 seconds calls the method chain methodA->methodB->methodC->methodD, with methodC sleeping for 10 milliseconds and methodD sleeping for 200 milliseconds. The choice of application is just to be able to clearly display in the Elastic APM UI that the application is being monitored.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Java application in full is shown here:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`package test;\\n\\npublic class Testing implements Runnable {\\n\\n public static void main(String[] args) {\\n new Thread(new Testing()).start();\\n }\\n\\n public void run()\\n {\\n while(true) {\\n try {Thread.sleep(2000);} catch (InterruptedException e) {}\\n methodA();\\n }\\n }\\n\\n public void methodA() {methodB();}\\n\\n public void methodB() {methodC();}\\n\\n public void methodC() {\\n System.out.println(\\"methodC executed\\");\\n try {Thread.sleep(10);} catch (InterruptedException e) {}\\n methodD();\\n }\\n\\n public void methodD() {\\n System.out.println(\\"methodD executed\\");\\n try {Thread.sleep(200);} catch (InterruptedException e) {}\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We created a Docker image containing that simple Java application for you that can be pulled from the following Docker repository:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker.elastic.co/demos/apm/k8s-webhook-test\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"deploy-the-pod\\",children:\\"Deploy the pod\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"First we need a deployment config. We\\\\u2019ll call the config file webhook-test.yaml, and the contents are pretty minimal \\\\u2014 just pull the image and run that as a pod & container called webhook-test in the default namespace:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`apiVersion: v1\\nkind: Pod\\nmetadata:\\n name: webhook-test\\n labels:\\n app: webhook-test\\nspec:\\n containers:\\n - image: docker.elastic.co/demos/apm/k8s-webhook-test\\n imagePullPolicy: Always\\n name: webhook-test\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This can be deployed normally using kubectl:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`kubectl apply -f webhook-test.yaml\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The result is exactly as expected:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ kubectl get pods\\nNAME READY STATUS RESTARTS AGE\\nwebhook-test 1/1 Running 0 10s\\n\\n$ kubectl logs webhook-test\\nmethodC executed\\nmethodD executed\\nmethodC executed\\nmethodD executed\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"So far, this is just setting up a standard Kubernetes application with no APM monitoring. Now we get to the interesting bit: adding in auto-instrumentation.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"install-elastic-apm-k8s-attacher\\",children:\\"Install Elastic APM K8s Attacher\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The first step is to install the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/attacher/current/index.html\\",rel:\\"nofollow\\",children:\\"Elastic APM K8s Attacher\\"}),\\". This only needs to be done once for the cluster \\\\u2014 once installed, it is always available. Before installation, we will define where the monitored data will go. As you will see later, we can decide or change this any time. For now, we\\\\u2019ll specify our own Elastic APM server, which is at \\",(0,t.jsx)(e.a,{href:\\"https://myserver.somecloud:443\\",rel:\\"nofollow\\",children:\\"https://myserver.somecloud:443\\"}),\\" \\\\u2014 we also have a secret token for authorization to that Elastic APM server, which has value MY_SECRET_TOKEN. (If you want to set up a quick test Elastic APM server, you can do so at \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"https://cloud.elastic.co/\\"}),\\").\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are two additional environment variables set for the application that are not generally needed but will help when we see the resulting UI content toward the end of the walkthrough (when the agent is auto-installed, these two variables tell the agent what name to give this application in the UI and what method to trace). Now we just need to define the custom yaml file to hold these. On installation, the custom yaml will be merged into the yaml for the Attacher:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`apm:\\n secret_token: MY_SECRET_TOKEN\\n namespaces:\\n - default\\nwebhookConfig:\\n agents:\\n java:\\n environment:\\n ELASTIC_APM_SERVER_URL: \\"https://myserver.somecloud:443\\"\\n ELASTIC_APM_TRACE_METHODS: \\"test.Testing#methodB\\"\\n ELASTIC_APM_SERVICE_NAME: \\"webhook-test\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"That custom.yaml file is all we need to install the attacher (note we\\\\u2019ve only specified the default namespace for agent auto-installation for now \\\\u2014 this can be easily changed, as you\\\\u2019ll see later). Next we\\\\u2019ll add the Elastic charts to helm \\\\u2014 this only needs to be done once, then all Elastic charts are available to helm. This is the usual helm add repo command, specifically:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`helm repo add elastic https://helm.elastic.co\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now the Elastic charts are available for installation (helm search repo would show you all the available charts). We\\\\u2019re going to use \\\\u201Celastic-webhook\\\\u201D as the name to install into, resulting in the following installation command:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`helm install elastic-webhook elastic/apm-attacher --namespace=elastic-apm --create-namespace --values custom.yaml\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And that\\\\u2019s it, we now have the Elastic APM K8s Attacher installed and set to send data to the APM server defined in the custom.yaml file! (You can confirm installation with a helm list -A if you need.)\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"auto-install-the-java-agent\\",children:\\"Auto-install the Java agent\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Elastic APM K8s Attacher is installed, but it doesn\\\\u2019t auto-install the APM application agents into every pod \\\\u2014 that could lead to problems! Instead the Attacher is deliberately limited to auto-install agents into deployments defined a) by the namespaces listed in the custom.yaml, and b) to those deployments in those namespaces that have a specific annotation \\\\u201Cco.elastic.apm/attach.\\\\u201D\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"So for now, restarting the webhook-test pod we created above won\\\\u2019t have any different effect on the pod, as it isn\\\\u2019t yet set to be monitored. What we need to do is add the annotation. Specifically, we need to add the annotation using the default agent configuration that was installed with the Attacher called \\\\u201Cjava\\\\u201D for the Java agent (we\\\\u2019ll see later how that agent configuration is altered \\\\u2014 the default configuration installs the latest agent version and leaves everything else default for that version). So adding that annotation in to webhook-test yaml gives us the new yaml file contents (the additional config is shown labelled (1)):\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`apiVersion: v1\\nkind: Pod\\nmetadata:\\n name: webhook-test\\n annotations: #(1)\\n co.elastic.apm/attach: java #(1)\\n labels:\\n app: webhook-test\\nspec:\\n containers:\\n - image: docker.elastic.co/demos/apm/k8s-webhook-test\\n imagePullPolicy: Always\\n name: webhook-test\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Applying this change gives us the application now monitored:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ kubectl delete -f webhook-test.yaml\\npod \\"webhook-test\\" deleted\\n$ kubectl apply -f webhook-test.yaml\\npod/webhook-test created\\n$ kubectl logs webhook-test\\n\\\\u2026 StartupInfo - Starting Elastic APM 1.45.0 \\\\u2026\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And since the agent is now feeding data to our APM server, we can now see it in the UI:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/application-monitoring-kubernetes-pods/webhook-test-k8s-blog.png\\",alt:\\"webhook-test\\",width:\\"1600\\",height:\\"819\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Note that the agent identifies Testing.methodB method as a trace root because of the ELASTIC_APM_TRACE_METHODS environment variable set to test.Testing#methodB in the custom.yaml \\\\u2014 this tells the agent to specifically trace that method. The time taken by that method will be available in the UI for each invocation, but we don\\\\u2019t see the sub-methods . . . currently. In the next section, we\\\\u2019ll see how easy it is to customize the Attacher, and in doing so we\\\\u2019ll see more detail about the method chain being executed in the application.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"customizing-the-agents\\",children:\\"Customizing the agents\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In your systems, you\\\\u2019ll likely have development, testing, and production environments. You\\\\u2019ll want to specify the version of the agent to use rather than just pull the latest version whatever that is, you\\\\u2019ll want to have debug on for some applications or instances, and you\\\\u2019ll want to have specific options set to specific values. This sounds like a lot of effort, but the attacher lets you enable these kinds of changes in a very simple way. In this section, we\\\\u2019ll add a configuration that specifies all these changes and we can see just how easy it is to configure and enable it.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We start at the custom.yaml file we defined above. This is the file that gets merged into the Attacher. Adding a new configuration with all the items listed in the last paragraph is easy \\\\u2014 though first we need to decide a name for our new configuration. We\\\\u2019ll call it \\\\u201Cjava-interesting\\\\u201D here. The new custom.yaml in full is (the first part is just the same as before, the new config is simply appended):\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`apm:\\n secret_token: MY_SECRET_TOKEN\\n namespaces:\\n - default\\nwebhookConfig:\\n agents:\\n java:\\n environment:\\n ELASTIC_APM_SERVER_URL: \\"https://myserver.somecloud:443\\"\\n ELASTIC_APM_TRACE_METHODS: \\"test.Testing#methodB\\"\\n ELASTIC_APM_SERVICE_NAME: \\"webhook-test\\"\\n java-interesting:\\n image: docker.elastic.co/observability/apm-agent-java:1.52.1\\n artifact: \\"/usr/agent/elastic-apm-agent.jar\\"\\n environment:\\n ELASTIC_APM_SERVER_URL: \\"https://myserver.somecloud:443\\"\\n ELASTIC_APM_TRACE_METHODS: \\"test.Testing#methodB\\"\\n ELASTIC_APM_SERVICE_NAME: \\"webhook-test\\"\\n ELASTIC_APM_ENVIRONMENT: \\"testing\\"\\n ELASTIC_APM_LOG_LEVEL: \\"debug\\"\\n ELASTIC_APM_PROFILING_INFERRED_SPANS_ENABLED: \\"true\\"\\n JAVA_TOOL_OPTIONS: \\"-javaagent:/elastic/apm/agent/elastic-apm-agent.jar\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Breaking the additional config down, we have:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"The name of the new config java-interesting\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"The APM Java agent image docker.elastic.co/observability/apm-agent-java\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"With a specific version 1.43.0 instead of latest\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"We need to specify the agent jar location (the attacher puts it here)\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\'artifact: \\"/usr/agent/elastic-apm-agent.jar\\"\'}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"And then the environment variables\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"ELASTIC_APM_SERVER_URL as before\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"ELASTIC_APM_ENVIRONMENT set to testing, useful when looking in the UI\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"ELASTIC_APM_LOG_LEVEL set to debug for more detailed agent output\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"ELASTIC_APM_PROFILING_INFERRED_SPANS_ENABLED turning this on (setting to true) will give us additional interesting information about the method chain being executed in the application\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\'And lastly we need to set JAVA_TOOL_OPTIONS to the enable starting the agent \\"-javaagent:/elastic/apm/agent/elastic-apm-agent.jar\\" \\\\u2014 this is fundamentally how the attacher auto-attaches the Java agent\'}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"More configurations and details about configuration options are \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/java/current/configuration.html\\",rel:\\"nofollow\\",children:\\"here for the Java agent\\"}),\\", and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/index.html\\",rel:\\"nofollow\\",children:\\"other language agents\\"}),\\" are also available.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"the-application-traced-with-the-new-configuration\\",children:\\"The application traced with the new configuration\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"And finally we just need to upgrade the attacher with the changed custom.yaml:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`helm upgrade elastic-webhook elastic/apm-attacher --namespace=elastic-apm --create-namespace --values custom.yaml\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is the same command as the original install, but now using upgrade. That\\\\u2019s it \\\\u2014 add config to the custom.yaml and upgrade the attacher, and it\\\\u2019s done! Simple.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Of course we still need to use the new config on an app. In this case, we\\\\u2019ll edit the existing webhook-test.yaml file, replacing java with java-interesting, so the annotation line is now:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`co.elastic.apm/attach: java-interesting\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Applying the new pod config and restarting the pod, you can see the logs now hold debug output:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`$ kubectl delete -f webhook-test.yaml\\npod \\"webhook-test\\" deleted\\n$ kubectl apply -f webhook-test.yaml\\npod/webhook-test created\\n$ kubectl logs webhook-test\\n\\\\u2026 StartupInfo - Starting Elastic APM 1.44.0 \\\\u2026\\n\\\\u2026 DEBUG co.elastic.apm.agent. \\\\u2026\\n\\\\u2026 DEBUG co.elastic.apm.agent. \\\\u2026\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"More interesting is the UI. Now that inferred spans is on, the full method chain is visible.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/application-monitoring-kubernetes-pods/trace-sample-k8s-blog.png\\",alt:\\"trace sample\\",width:\\"1600\\",height:\\"551\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This gives the details for methodB (it takes 211 milliseconds because it calls methodC - 10ms - which calls methodD - 200ms). The times for methodC and methodD are inferred rather than recorded, (inferred rather than traced \\\\u2014 if you needed accurate times you would instead add the methods to trace_methods and have them traced too).\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"note-on-the-eck-operator\\",children:\\"Note on the ECK operator\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud-on-k8s/master/k8s-overview.html\\",rel:\\"nofollow\\",children:\\"Elastic Cloud on Kubernetes operator\\"}),\\" allows you to install and manage a number of other Elastic components on Kubernetes. At the time of publication of this blog, the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/attacher/current/index.html\\",rel:\\"nofollow\\",children:\\"Elastic APM K8s Attacher\\"}),\\" is a separate component, and there is no conflict between these management mechanisms \\\\u2014 they apply to different components and are independent of each other.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-it-yourself\\",children:\\"Try it yourself!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This walkthrough is easily repeated on your system, and you can make it more useful by replacing the example application with your own and the Docker registry with the one you use.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/kubernetes-monitoring\\",rel:\\"nofollow\\",children:\\"Learn more about real-time monitoring with Kubernetes and Elastic Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(c,{...n})}):c(n)}return A(E);})();\\n;return Component;"},"_id":"articles/how-to-add-application-monitoring-in-kubernetes-pods.mdx","_raw":{"sourceFilePath":"articles/how-to-add-application-monitoring-in-kubernetes-pods.mdx","sourceFileName":"how-to-add-application-monitoring-in-kubernetes-pods.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/how-to-add-application-monitoring-in-kubernetes-pods"},"type":"Article","imageUrl":"/assets/images/application-monitoring-kubernetes-pods/139689_-_Blog_Header_Banner_V1.jpg","readingTime":"12 min read","url":"/application-monitoring-kubernetes-pods","headings":[{"level":2,"title":"Prerequisites","href":"#prerequisites"},{"level":2,"title":"The example application","href":"#the-example-application"},{"level":2,"title":"Deploy the pod","href":"#deploy-the-pod"},{"level":2,"title":"Install Elastic APM K8s Attacher","href":"#install-elastic-apm-k8s-attacher"},{"level":2,"title":"Auto-install the Java agent","href":"#auto-install-the-java-agent"},{"level":2,"title":"Customizing the agents","href":"#customizing-the-agents"},{"level":2,"title":"The application traced with the new configuration","href":"#the-application-traced-with-the-new-configuration"},{"level":2,"title":"Note on the ECK operator","href":"#note-on-the-eck-operator"},{"level":2,"title":"Try it yourself!","href":"#try-it-yourself"}]},{"title":"How to deploy Hello World Elastic Observability on Google Cloud Run","slug":"deploy-observability-google-cloud-run","date":"2023-08-28","description":"Follow the step-by-step process of instrumenting Elastic Observability for a Hello World web app running on Google Cloud Run.","image":"illustration-dev-sec-ops-cloud-automations-1680x980.png","author":[{"slug":"jonathan-simon","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"google-cloud","type":"Tag","_raw":{}},{"slug":"google-cloud-run","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic Cloud Observability is the premiere tool to provide visibility into your running web apps. Google Cloud Run is the serverless platform of choice to run your web apps that need to scale up massively and scale down to zero. Elastic Observability combined with Google Cloud Run is the perfect solution for developers to deploy [web apps that are auto-scaled with fully observable operations](https://www.elastic.co/blog/observability-powerful-flexible-efficient), in a way that’s straightforward to implement and manage.\\n\\nThis blog post will show you how to deploy a simple Hello World web app to Cloud Run and then walk you through the steps to instrument the Hello World web app to enable observation of the application’s operations with Elastic Cloud.\\n\\n## Elastic Observability setup\\n\\nWe’ll start with setting up an Elastic Cloud deployment, which is where observability will take place for the web app we’ll be deploying.\\n\\nFrom the [Elastic Cloud console](https://cloud.elastic.co), select **Create deployment**.\\n\\n![create deployment](/assets/images/deploy-observability-google-cloud-run/elastic-blog-1-create-deployment.png)\\n\\nEnter a deployment name and click **Create deployment**. It takes a few minutes for your deployment to be created. While waiting, you are prompted to save the admin credentials for your deployment, which provides you with superuser access to your Elastic\xae deployment. Keep these credentials safe as they are shown only once.\\n\\nElastic Observability requires an APM Server URL and an APM Secret token for an app to send observability data to Elastic Cloud. Once the deployment is created, we’ll copy the Elastic Observability server URL and secret token and store them somewhere safely for adding to our web app code in a later step.\\n\\nTo copy the APM Server URL and the APM Secret Token, go to [Elastic Cloud](https://cloud.elastic.co/home). Then go to the [Deployments](https://cloud.elastic.co/deployments) page which lists all of the deployments you have created. Select the deployment you want to use, which will open the deployment details page. In the **Kibana** row of links, click on **Open** to open **Kibana** for your deployment.\\n\\n![my deployment](/assets/images/deploy-observability-google-cloud-run/elastic-blog-2-my-deployment.png)\\n\\nSelect **Integrations** from the top-level menu. Then click the **APM** tile.\\n\\n![apm](/assets/images/deploy-observability-google-cloud-run/elastic-blog-3-apm.png)\\n\\nOn the APM Agents page, copy the secretToken and the serverUrl values and save them for use in a later step.\\n\\n![apm agents](/assets/images/deploy-observability-google-cloud-run/elastic-blog-4-apm-agents.png)\\n\\nNow that we’ve completed the Elastic Cloud setup, the next step is to set up our Google Cloud project for deploying apps to Cloud Run.\\n\\n## Google Cloud Run setup\\n\\nFirst we’ll need a Google Cloud project, so let’s create one by going to the [Google Cloud console](https://console.cloud.google.com) and creating a new project. Select the project menu and then click the **New Project** button.\\n\\n![google cloud with gray dropdown](/assets/images/deploy-observability-google-cloud-run/elastic-blog-5-google-cloud-gray-dropdown.png)\\n\\n![select a project](/assets/images/deploy-observability-google-cloud-run/elastic-blog-5-select-a-project.png)\\n\\nOnce the new project is created, we’ll need to enable the necessary APIs that our Hello World app will be using. This can be done by clicking this [enable APIs](https://console.cloud.google.com/flows/enableapi?apiid=compute.googleapis.com,,run.googleapis.com,containerregistry.googleapis.com,cloudbuild.googleapis.com) link, which opens a page in the Google Cloud console that lists the APIs that will be enabled and allows us to confirm their activation.\\n\\n![enable apis](/assets/images/deploy-observability-google-cloud-run/elastic-blog-6-enable-apis.png)\\n\\nAfter we’ve enabled the necessary APIs, we’ll need to set up the required permissions for our Hello World app, which can be done in the [IAM section](https://console.cloud.google.com/iam-admin) of the Google Cloud Console. Within the IAM section, select the **Compute Engine** default service account and add the following roles:\\n\\n- Logs Viewer\\n- Monitoring Viewer\\n- Pub/Sub Subscriber\\n\\n![principals](/assets/images/deploy-observability-google-cloud-run/elastic-blog-7-principals.png)\\n\\n## Deploy a Hello World web app to Cloud Run\\n\\nWe’ll perform the process of deploying a Node.js Hello World web app to Cloud Run using the handy Google Cloud tool called [Cloud Shell Editor](https://console.cloud.google.com/cloudshelleditor). To deploy the Hello World app, we’ll perform the following five steps:\\n\\n1. In Cloud Shell Editor, in the terminal window that appears at the bottom of the screen, clone a [Node.js Hello World sample app](https://github.com/elastic/observability-examples/tree/main/gcp/run/helloworld) repo from GitHub by entering the following command.\\n\\n```bash\\ngit clone https://github.com/elastic/observability-examples\\n```\\n\\n2. Change directory to the location of the Hello World web app code.\\n\\n```bash\\ncd gcp/run/helloworld\\n```\\n\\n3. Build the Hello World app image and push the image to Google Container Registry by running the command below in the terminal. Be sure to replace your-project-id in the command below with your actual Google Cloud project ID.\\n\\n```bash\\ngcloud builds submit --tag gcr.io/your-project-id/elastic-helloworld\\n```\\n\\n4. Deploy the Hello World app to Google Cloud Run by running the command below. Be sure to replace your-project-id in the command below with your actual Google Cloud project ID.\\n\\n```bash\\ngcloud run deploy elastic-helloworld --image gcr.io/your-project-id/elastic-helloworld\\n```\\n\\n5. When the deployment process is complete, a Service URL will be displayed within the terminal. Copy and paste the Service URL in a browser to view the Hello World app running in Cloud Run.\\n\\n![hello world](/assets/images/deploy-observability-google-cloud-run/elastic-blog-8-hello-world.png)\\n\\n## Instrument the Hello World web app with Elastic Observability\\n\\nWith a web app successfully running in Cloud Run, we’re now ready to add the minimal code necessary to start monitoring the app. To enable observability for the Hello World app in Elastic Cloud, we’ll perform the following six steps:\\n\\n1. In the Google Cloud Shell Editor, edit the Dockerfile file to add the following Elastic Open Telemetry environment variables along with the commands to install and run the Elastic APM agent. Replace the ELASTIC_APM_SERVER_URL text and the ELASTIC_APM_SECRET_TOKEN text with the APM Server URL and the APM Secret Token values that you copied and saved in an earlier step.\\n\\n```dockerfile\\nENV OTEL_EXPORTER_OTLP_ENDPOINT=\'ELASTIC_APM_SERVER_URL\'\\nENV OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer ELASTIC_APM_SECRET_TOKEN\'\\nENV OTEL_LOG_LEVEL=info\\nENV OTEL_METRICS_EXPORTER=otlp\\nENV OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\nENV OTEL_SERVICE_NAME=helloworld\\nENV OTEL_TRACES_EXPORTER=otlp\\nRUN npm install --save @opentelemetry/api\\nRUN npm install --save @opentelemetry/auto-instrumentations-node\\nCMD [\\"node\\", \\"--require\\", \\"@opentelemetry/auto-instrumentations-node/register\\", \\"index.js\\"]\\n```\\n\\nThe updated Dockerfile should look something like this:\\n\\n```dockerfile\\nFROM node:18-slim\\nWORKDIR /usr/src/app\\nCOPY package*.json ./\\nRUN npm install --only=production\\nCOPY . ./\\nOTEL_EXPORTER_OTLP_ENDPOINT=\'https://******.apm.us-central1.gcp.cloud.es.io:443\'\\nENV OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer ******************\'\\nENV OTEL_LOG_LEVEL=info\\nENV OTEL_METRICS_EXPORTER=otlp\\nENV OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\nENV OTEL_SERVICE_NAME=helloworld\\nENV OTEL_TRACES_EXPORTER=otlp\\nRUN npm install --save @opentelemetry/api\\nRUN npm install --save @opentelemetry/auto-instrumentations-node\\nCMD [\\"node\\", \\"--require\\", \\"@opentelemetry/auto-instrumentations-node/register\\", \\"index.js\\"]\\n```\\n\\n2. In the Google Cloud Shell Editor, edit the package.json file to add the Elastic APM dependency. The dependencies section in package.json should look something like this:\\n\\n```json\\n\\"dependencies\\": {\\n \\t\\"express\\": \\"^4.18.2\\",\\n \\t\\"elastic-apm-node\\": \\"^3.49.1\\"\\n },\\n```\\n\\n3. In the Google Cloud Shell Editor, edit the index.js file:\\n\\n- Add the code required to initialize the Elastic Open Telemetry APM agent:\\n\\n```javascript\\nconst otel = require(\\"@opentelemetry/api\\");\\nconst tracer = otel.trace.getTracer(\\"hello-world\\");\\n```\\n\\n- Replace the “Hello World!” output code . . .\\n\\n```javascript\\nres.send(`Hello World!
`);\\n```\\n\\n...with the “Hello Elastic Observability” code block.\\n\\n```javascript\\nres.send(\\n `\\n
\\n Hello Elastic Observability - Google Cloud Run - Node.js\\n
\\n
\\n
`\\n);\\n```\\n\\n- Add a trace “hi” before the “Hello Elastic Observability” code block and add a trace “bye” after the “Hello Elastic Observability” code block.\\n\\n```javascript\\ntracer.startActiveSpan(\\"hi\\", (span) => {\\n console.log(\\"hello\\");\\n span.end();\\n});\\nres.send(\\n `\\n
\\n Hello Elastic Observability - Google Cloud Run - Node.js\\n
\\n
\\n
`\\n);\\ntracer.startActiveSpan(\\"bye\\", (span) => {\\n console.log(\\"goodbye\\");\\n span.end();\\n});\\n```\\n\\n- The completed index.js file should look something like this:\\n\\n```javascript\\nconst otel = require(\\"@opentelemetry/api\\");\\nconst tracer = otel.trace.getTracer(\\"hello-world\\");\\n\\nconst express = require(\\"express\\");\\nconst app = express();\\n\\napp.get(\\"/\\", (req, res) => {\\n tracer.startActiveSpan(\\"hi\\", (span) => {\\n console.log(\\"hello\\");\\n span.end();\\n });\\n res.send(\\n `\\n
\\n Hello Elastic Observability - Google Cloud Run - Node.js\\n
\\n
\\n
`\\n );\\n tracer.startActiveSpan(\\"bye\\", (span) => {\\n console.log(\\"goodbye\\");\\n span.end();\\n });\\n});\\n\\nconst port = parseInt(process.env.PORT) || 8080;\\napp.listen(port, () => {\\n console.log(`helloworld: listening on port ${port}`);\\n});\\n```\\n\\n4. Rebuild the Hello World app image and push the image to the Google Container Registry by running the command below in the terminal. Be sure to replace your-project-id in the command below with your actual Google Cloud project ID.\\n\\n```bash\\ngcloud builds submit --tag gcr.io/your-project-id/elastic-helloworld\\n```\\n\\n5. Redeploy the Hello World app to Google Cloud Run by running the command below. Be sure to replace your-project-id in the command below with your actual Google Cloud project ID.\\n\\n```bash\\ngcloud run deploy elastic-helloworld --image gcr.io/your-project-id/elastic-helloworld\\n```\\n\\n6. When the deployment process is complete, a Service URL will be displayed within the terminal. Copy and paste the Service URL in a browser to view the updated Hello World app running in Cloud Run.\\n\\n![elastic logo](/assets/images/deploy-observability-google-cloud-run/elastic-blog-9-elastic-logo.png)\\n\\n## Observe the Hello World web app\\n\\nNow that we’ve instrumented the web app to send observability data to Elastic Observability, we can now use Elastic Cloud to monitor the web app’s operations.\\n\\n1. In Elastic Cloud, select the Observability **Services** menu item.\\n\\n2. Click the **helloworld** service.\\n\\n3. Click the **Transactions** tab.\\n\\n4. Scroll down and click the **GET /** transaction.\\n\\n5. Scroll down to the **Trace Sample** section to see the **GET /** , **hi** and **bye** trace samples.\\n\\n![trace sample](/assets/images/deploy-observability-google-cloud-run/elastic-blog-10-trace-sample.png)\\n\\n## Observability made to scale\\n\\nYou’ve seen the entire process of deploying a web app to Google Cloud Run that is instrumented with Elastic Observability. The end result is a web app that will scale up and down with demand combined with the observability tools to monitor the web app as it serves a single user or millions of users.\\n\\nNow that you’ve seen how to deploy a serverless web app instrumented with observability, visit [Elastic Observability](https://www.elastic.co/observability) to learn more about how to implement a complete observability solution for your apps. Or visit [Getting started with Elastic on Google Cloud](https://www.elastic.co/getting-started/google-cloud) for more examples of how you can drive the data insights you need by combining [Google Cloud monitoring](https://www.elastic.co/observability/google-cloud-monitoring) and cloud computing services with Elastic’s search-powered platform.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var i=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var y=(o,e)=>()=>(e||o((e={exports:{}}).exports,e),e.exports),w=(o,e)=>{for(var n in e)i(o,n,{get:e[n],enumerable:!0})},a=(o,e,n,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let t of u(e))!b.call(o,t)&&t!==n&&i(o,t,{get:()=>e[t],enumerable:!(r=g(e,t))||r.enumerable});return o};var E=(o,e,n)=>(n=o!=null?p(m(o)):{},a(e||!o||!o.__esModule?i(n,\\"default\\",{value:o,enumerable:!0}):n,o)),v=o=>a(i({},\\"__esModule\\",{value:!0}),o);var c=y((T,s)=>{s.exports=_jsx_runtime});var R={};w(R,{default:()=>h,frontmatter:()=>f});var l=E(c()),f={title:\\"How to deploy Hello World Elastic Observability on Google Cloud Run\\",slug:\\"deploy-observability-google-cloud-run\\",date:\\"2023-08-28\\",description:\\"Follow the step-by-step process of instrumenting Elastic Observability for a Hello World web app running on Google Cloud Run.\\",author:[{slug:\\"jonathan-simon\\"}],image:\\"illustration-dev-sec-ops-cloud-automations-1680x980.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"google-cloud\\"},{slug:\\"google-cloud-run\\"},{slug:\\"apm\\"}]};function d(o){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...o.components};return(0,l.jsxs)(l.Fragment,{children:[(0,l.jsxs)(e.p,{children:[\\"Elastic Cloud Observability is the premiere tool to provide visibility into your running web apps. Google Cloud Run is the serverless platform of choice to run your web apps that need to scale up massively and scale down to zero. Elastic Observability combined with Google Cloud Run is the perfect solution for developers to deploy \\",(0,l.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-powerful-flexible-efficient\\",rel:\\"nofollow\\",children:\\"web apps that are auto-scaled with fully observable operations\\"}),\\", in a way that\\\\u2019s straightforward to implement and manage.\\"]}),`\\n`,(0,l.jsx)(e.p,{children:\\"This blog post will show you how to deploy a simple Hello World web app to Cloud Run and then walk you through the steps to instrument the Hello World web app to enable observation of the application\\\\u2019s operations with Elastic Cloud.\\"}),`\\n`,(0,l.jsx)(e.h2,{id:\\"elastic-observability-setup\\",children:\\"Elastic Observability setup\\"}),`\\n`,(0,l.jsx)(e.p,{children:\\"We\\\\u2019ll start with setting up an Elastic Cloud deployment, which is where observability will take place for the web app we\\\\u2019ll be deploying.\\"}),`\\n`,(0,l.jsxs)(e.p,{children:[\\"From the \\",(0,l.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud console\\"}),\\", select \\",(0,l.jsx)(e.strong,{children:\\"Create deployment\\"}),\\".\\"]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-1-create-deployment.png\\",alt:\\"create deployment\\",width:\\"1686\\",height:\\"1491\\"})}),`\\n`,(0,l.jsxs)(e.p,{children:[\\"Enter a deployment name and click \\",(0,l.jsx)(e.strong,{children:\\"Create deployment\\"}),\\". It takes a few minutes for your deployment to be created. While waiting, you are prompted to save the admin credentials for your deployment, which provides you with superuser access to your Elastic\\",(0,l.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" deployment. Keep these credentials safe as they are shown only once.\\"]}),`\\n`,(0,l.jsx)(e.p,{children:\\"Elastic Observability requires an APM Server URL and an APM Secret token for an app to send observability data to Elastic Cloud. Once the deployment is created, we\\\\u2019ll copy the Elastic Observability server URL and secret token and store them somewhere safely for adding to our web app code in a later step.\\"}),`\\n`,(0,l.jsxs)(e.p,{children:[\\"To copy the APM Server URL and the APM Secret Token, go to \\",(0,l.jsx)(e.a,{href:\\"https://cloud.elastic.co/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\". Then go to the \\",(0,l.jsx)(e.a,{href:\\"https://cloud.elastic.co/deployments\\",rel:\\"nofollow\\",children:\\"Deployments\\"}),\\" page which lists all of the deployments you have created. Select the deployment you want to use, which will open the deployment details page. In the \\",(0,l.jsx)(e.strong,{children:\\"Kibana\\"}),\\" row of links, click on \\",(0,l.jsx)(e.strong,{children:\\"Open\\"}),\\" to open \\",(0,l.jsx)(e.strong,{children:\\"Kibana\\"}),\\" for your deployment.\\"]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-2-my-deployment.png\\",alt:\\"my deployment\\",width:\\"1416\\",height:\\"1095\\"})}),`\\n`,(0,l.jsxs)(e.p,{children:[\\"Select \\",(0,l.jsx)(e.strong,{children:\\"Integrations\\"}),\\" from the top-level menu. Then click the \\",(0,l.jsx)(e.strong,{children:\\"APM\\"}),\\" tile.\\"]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-3-apm.png\\",alt:\\"apm\\",width:\\"1377\\",height:\\"1206\\"})}),`\\n`,(0,l.jsx)(e.p,{children:\\"On the APM Agents page, copy the secretToken and the serverUrl values and save them for use in a later step.\\"}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-4-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1999\\",height:\\"1415\\"})}),`\\n`,(0,l.jsx)(e.p,{children:\\"Now that we\\\\u2019ve completed the Elastic Cloud setup, the next step is to set up our Google Cloud project for deploying apps to Cloud Run.\\"}),`\\n`,(0,l.jsx)(e.h2,{id:\\"google-cloud-run-setup\\",children:\\"Google Cloud Run setup\\"}),`\\n`,(0,l.jsxs)(e.p,{children:[\\"First we\\\\u2019ll need a Google Cloud project, so let\\\\u2019s create one by going to the \\",(0,l.jsx)(e.a,{href:\\"https://console.cloud.google.com\\",rel:\\"nofollow\\",children:\\"Google Cloud console\\"}),\\" and creating a new project. Select the project menu and then click the \\",(0,l.jsx)(e.strong,{children:\\"New Project\\"}),\\" button.\\"]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-5-google-cloud-gray-dropdown.png\\",alt:\\"google cloud with gray dropdown\\",width:\\"762\\",height:\\"141\\"})}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-5-select-a-project.png\\",alt:\\"select a project\\",width:\\"1999\\",height:\\"442\\"})}),`\\n`,(0,l.jsxs)(e.p,{children:[\\"Once the new project is created, we\\\\u2019ll need to enable the necessary APIs that our Hello World app will be using. This can be done by clicking this \\",(0,l.jsx)(e.a,{href:\\"https://console.cloud.google.com/flows/enableapi?apiid=compute.googleapis.com,,run.googleapis.com,containerregistry.googleapis.com,cloudbuild.googleapis.com\\",rel:\\"nofollow\\",children:\\"enable APIs\\"}),\\" link, which opens a page in the Google Cloud console that lists the APIs that will be enabled and allows us to confirm their activation.\\"]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-6-enable-apis.png\\",alt:\\"enable apis\\",width:\\"1062\\",height:\\"675\\"})}),`\\n`,(0,l.jsxs)(e.p,{children:[\\"After we\\\\u2019ve enabled the necessary APIs, we\\\\u2019ll need to set up the required permissions for our Hello World app, which can be done in the \\",(0,l.jsx)(e.a,{href:\\"https://console.cloud.google.com/iam-admin\\",rel:\\"nofollow\\",children:\\"IAM section\\"}),\\" of the Google Cloud Console. Within the IAM section, select the \\",(0,l.jsx)(e.strong,{children:\\"Compute Engine\\"}),\\" default service account and add the following roles:\\"]}),`\\n`,(0,l.jsxs)(e.ul,{children:[`\\n`,(0,l.jsx)(e.li,{children:\\"Logs Viewer\\"}),`\\n`,(0,l.jsx)(e.li,{children:\\"Monitoring Viewer\\"}),`\\n`,(0,l.jsx)(e.li,{children:\\"Pub/Sub Subscriber\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-7-principals.png\\",alt:\\"principals\\",width:\\"1797\\",height:\\"675\\"})}),`\\n`,(0,l.jsx)(e.h2,{id:\\"deploy-a-hello-world-web-app-to-cloud-run\\",children:\\"Deploy a Hello World web app to Cloud Run\\"}),`\\n`,(0,l.jsxs)(e.p,{children:[\\"We\\\\u2019ll perform the process of deploying a Node.js Hello World web app to Cloud Run using the handy Google Cloud tool called \\",(0,l.jsx)(e.a,{href:\\"https://console.cloud.google.com/cloudshelleditor\\",rel:\\"nofollow\\",children:\\"Cloud Shell Editor\\"}),\\". To deploy the Hello World app, we\\\\u2019ll perform the following five steps:\\"]}),`\\n`,(0,l.jsxs)(e.ol,{children:[`\\n`,(0,l.jsxs)(e.li,{children:[\\"In Cloud Shell Editor, in the terminal window that appears at the bottom of the screen, clone a \\",(0,l.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/gcp/run/helloworld\\",rel:\\"nofollow\\",children:\\"Node.js Hello World sample app\\"}),\\" repo from GitHub by entering the following command.\\"]}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/elastic/observability-examples\\n`})}),`\\n`,(0,l.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,l.jsx)(e.li,{children:\\"Change directory to the location of the Hello World web app code.\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-bash\\",children:`cd gcp/run/helloworld\\n`})}),`\\n`,(0,l.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,l.jsx)(e.li,{children:\\"Build the Hello World app image and push the image to Google Container Registry by running the command below in the terminal. Be sure to replace your-project-id in the command below with your actual Google Cloud project ID.\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-bash\\",children:`gcloud builds submit --tag gcr.io/your-project-id/elastic-helloworld\\n`})}),`\\n`,(0,l.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,l.jsx)(e.li,{children:\\"Deploy the Hello World app to Google Cloud Run by running the command below. Be sure to replace your-project-id in the command below with your actual Google Cloud project ID.\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-bash\\",children:`gcloud run deploy elastic-helloworld --image gcr.io/your-project-id/elastic-helloworld\\n`})}),`\\n`,(0,l.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,l.jsx)(e.li,{children:\\"When the deployment process is complete, a Service URL will be displayed within the terminal. Copy and paste the Service URL in a browser to view the Hello World app running in Cloud Run.\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-8-hello-world.png\\",alt:\\"hello world\\",width:\\"1554\\",height:\\"288\\"})}),`\\n`,(0,l.jsx)(e.h2,{id:\\"instrument-the-hello-world-web-app-with-elastic-observability\\",children:\\"Instrument the Hello World web app with Elastic Observability\\"}),`\\n`,(0,l.jsx)(e.p,{children:\\"With a web app successfully running in Cloud Run, we\\\\u2019re now ready to add the minimal code necessary to start monitoring the app. To enable observability for the Hello World app in Elastic Cloud, we\\\\u2019ll perform the following six steps:\\"}),`\\n`,(0,l.jsxs)(e.ol,{children:[`\\n`,(0,l.jsx)(e.li,{children:\\"In the Google Cloud Shell Editor, edit the Dockerfile file to add the following Elastic Open Telemetry environment variables along with the commands to install and run the Elastic APM agent. Replace the ELASTIC_APM_SERVER_URL text and the ELASTIC_APM_SECRET_TOKEN text with the APM Server URL and the APM Secret Token values that you copied and saved in an earlier step.\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-dockerfile\\",children:`ENV OTEL_EXPORTER_OTLP_ENDPOINT=\'ELASTIC_APM_SERVER_URL\'\\nENV OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer ELASTIC_APM_SECRET_TOKEN\'\\nENV OTEL_LOG_LEVEL=info\\nENV OTEL_METRICS_EXPORTER=otlp\\nENV OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\nENV OTEL_SERVICE_NAME=helloworld\\nENV OTEL_TRACES_EXPORTER=otlp\\nRUN npm install --save @opentelemetry/api\\nRUN npm install --save @opentelemetry/auto-instrumentations-node\\nCMD [\\"node\\", \\"--require\\", \\"@opentelemetry/auto-instrumentations-node/register\\", \\"index.js\\"]\\n`})}),`\\n`,(0,l.jsx)(e.p,{children:\\"The updated Dockerfile should look something like this:\\"}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM node:18-slim\\nWORKDIR /usr/src/app\\nCOPY package*.json ./\\nRUN npm install --only=production\\nCOPY . ./\\nOTEL_EXPORTER_OTLP_ENDPOINT=\'https://******.apm.us-central1.gcp.cloud.es.io:443\'\\nENV OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer ******************\'\\nENV OTEL_LOG_LEVEL=info\\nENV OTEL_METRICS_EXPORTER=otlp\\nENV OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\nENV OTEL_SERVICE_NAME=helloworld\\nENV OTEL_TRACES_EXPORTER=otlp\\nRUN npm install --save @opentelemetry/api\\nRUN npm install --save @opentelemetry/auto-instrumentations-node\\nCMD [\\"node\\", \\"--require\\", \\"@opentelemetry/auto-instrumentations-node/register\\", \\"index.js\\"]\\n`})}),`\\n`,(0,l.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,l.jsx)(e.li,{children:\\"In the Google Cloud Shell Editor, edit the package.json file to add the Elastic APM dependency. The dependencies section in package.json should look something like this:\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-json\\",children:`\\"dependencies\\": {\\n \\t\\"express\\": \\"^4.18.2\\",\\n \\t\\"elastic-apm-node\\": \\"^3.49.1\\"\\n },\\n`})}),`\\n`,(0,l.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,l.jsx)(e.li,{children:\\"In the Google Cloud Shell Editor, edit the index.js file:\\"}),`\\n`]}),`\\n`,(0,l.jsxs)(e.ul,{children:[`\\n`,(0,l.jsx)(e.li,{children:\\"Add the code required to initialize the Elastic Open Telemetry APM agent:\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-javascript\\",children:`const otel = require(\\"@opentelemetry/api\\");\\nconst tracer = otel.trace.getTracer(\\"hello-world\\");\\n`})}),`\\n`,(0,l.jsxs)(e.ul,{children:[`\\n`,(0,l.jsx)(e.li,{children:\\"Replace the \\\\u201CHello World!\\\\u201D output code . . .\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-javascript\\",children:\\"res.send(`Hello World!
`);\\\\n\\"})}),`\\n`,(0,l.jsx)(e.p,{children:\\"...with the \\\\u201CHello Elastic Observability\\\\u201D code block.\\"}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-javascript\\",children:`res.send(\\n \\\\`\\n
\\n Hello Elastic Observability - Google Cloud Run - Node.js\\n
\\n
\\n
\\\\`\\n);\\n`})}),`\\n`,(0,l.jsxs)(e.ul,{children:[`\\n`,(0,l.jsx)(e.li,{children:\\"Add a trace \\\\u201Chi\\\\u201D before the \\\\u201CHello Elastic Observability\\\\u201D code block and add a trace \\\\u201Cbye\\\\u201D after the \\\\u201CHello Elastic Observability\\\\u201D code block.\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-javascript\\",children:`tracer.startActiveSpan(\\"hi\\", (span) => {\\n console.log(\\"hello\\");\\n span.end();\\n});\\nres.send(\\n \\\\`\\n
\\n Hello Elastic Observability - Google Cloud Run - Node.js\\n
\\n
\\n
\\\\`\\n);\\ntracer.startActiveSpan(\\"bye\\", (span) => {\\n console.log(\\"goodbye\\");\\n span.end();\\n});\\n`})}),`\\n`,(0,l.jsxs)(e.ul,{children:[`\\n`,(0,l.jsx)(e.li,{children:\\"The completed index.js file should look something like this:\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-javascript\\",children:`const otel = require(\\"@opentelemetry/api\\");\\nconst tracer = otel.trace.getTracer(\\"hello-world\\");\\n\\nconst express = require(\\"express\\");\\nconst app = express();\\n\\napp.get(\\"/\\", (req, res) => {\\n tracer.startActiveSpan(\\"hi\\", (span) => {\\n console.log(\\"hello\\");\\n span.end();\\n });\\n res.send(\\n \\\\`\\n
\\n Hello Elastic Observability - Google Cloud Run - Node.js\\n
\\n
\\n
\\\\`\\n );\\n tracer.startActiveSpan(\\"bye\\", (span) => {\\n console.log(\\"goodbye\\");\\n span.end();\\n });\\n});\\n\\nconst port = parseInt(process.env.PORT) || 8080;\\napp.listen(port, () => {\\n console.log(\\\\`helloworld: listening on port \\\\${port}\\\\`);\\n});\\n`})}),`\\n`,(0,l.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,l.jsx)(e.li,{children:\\"Rebuild the Hello World app image and push the image to the Google Container Registry by running the command below in the terminal. Be sure to replace your-project-id in the command below with your actual Google Cloud project ID.\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-bash\\",children:`gcloud builds submit --tag gcr.io/your-project-id/elastic-helloworld\\n`})}),`\\n`,(0,l.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,l.jsx)(e.li,{children:\\"Redeploy the Hello World app to Google Cloud Run by running the command below. Be sure to replace your-project-id in the command below with your actual Google Cloud project ID.\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.pre,{children:(0,l.jsx)(e.code,{className:\\"language-bash\\",children:`gcloud run deploy elastic-helloworld --image gcr.io/your-project-id/elastic-helloworld\\n`})}),`\\n`,(0,l.jsxs)(e.ol,{start:\\"6\\",children:[`\\n`,(0,l.jsx)(e.li,{children:\\"When the deployment process is complete, a Service URL will be displayed within the terminal. Copy and paste the Service URL in a browser to view the updated Hello World app running in Cloud Run.\\"}),`\\n`]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-9-elastic-logo.png\\",alt:\\"elastic logo\\",width:\\"1999\\",height:\\"906\\"})}),`\\n`,(0,l.jsx)(e.h2,{id:\\"observe-the-hello-world-web-app\\",children:\\"Observe the Hello World web app\\"}),`\\n`,(0,l.jsx)(e.p,{children:\\"Now that we\\\\u2019ve instrumented the web app to send observability data to Elastic Observability, we can now use Elastic Cloud to monitor the web app\\\\u2019s operations.\\"}),`\\n`,(0,l.jsxs)(e.ol,{children:[`\\n`,(0,l.jsxs)(e.li,{children:[`\\n`,(0,l.jsxs)(e.p,{children:[\\"In Elastic Cloud, select the Observability \\",(0,l.jsx)(e.strong,{children:\\"Services\\"}),\\" menu item.\\"]}),`\\n`]}),`\\n`,(0,l.jsxs)(e.li,{children:[`\\n`,(0,l.jsxs)(e.p,{children:[\\"Click the \\",(0,l.jsx)(e.strong,{children:\\"helloworld\\"}),\\" service.\\"]}),`\\n`]}),`\\n`,(0,l.jsxs)(e.li,{children:[`\\n`,(0,l.jsxs)(e.p,{children:[\\"Click the \\",(0,l.jsx)(e.strong,{children:\\"Transactions\\"}),\\" tab.\\"]}),`\\n`]}),`\\n`,(0,l.jsxs)(e.li,{children:[`\\n`,(0,l.jsxs)(e.p,{children:[\\"Scroll down and click the \\",(0,l.jsx)(e.strong,{children:\\"GET /\\"}),\\" transaction.\\"]}),`\\n`]}),`\\n`,(0,l.jsxs)(e.li,{children:[`\\n`,(0,l.jsxs)(e.p,{children:[\\"Scroll down to the \\",(0,l.jsx)(e.strong,{children:\\"Trace Sample\\"}),\\" section to see the \\",(0,l.jsx)(e.strong,{children:\\"GET /\\"}),\\" , \\",(0,l.jsx)(e.strong,{children:\\"hi\\"}),\\" and \\",(0,l.jsx)(e.strong,{children:\\"bye\\"}),\\" trace samples.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.img,{src:\\"/assets/images/deploy-observability-google-cloud-run/elastic-blog-10-trace-sample.png\\",alt:\\"trace sample\\",width:\\"1725\\",height:\\"1056\\"})}),`\\n`,(0,l.jsx)(e.h2,{id:\\"observability-made-to-scale\\",children:\\"Observability made to scale\\"}),`\\n`,(0,l.jsx)(e.p,{children:\\"You\\\\u2019ve seen the entire process of deploying a web app to Google Cloud Run that is instrumented with Elastic Observability. The end result is a web app that will scale up and down with demand combined with the observability tools to monitor the web app as it serves a single user or millions of users.\\"}),`\\n`,(0,l.jsxs)(e.p,{children:[\\"Now that you\\\\u2019ve seen how to deploy a serverless web app instrumented with observability, visit \\",(0,l.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\"}),\\" to learn more about how to implement a complete observability solution for your apps. Or visit \\",(0,l.jsx)(e.a,{href:\\"https://www.elastic.co/getting-started/google-cloud\\",rel:\\"nofollow\\",children:\\"Getting started with Elastic on Google Cloud\\"}),\\" for more examples of how you can drive the data insights you need by combining \\",(0,l.jsx)(e.a,{href:\\"https://www.elastic.co/observability/google-cloud-monitoring\\",rel:\\"nofollow\\",children:\\"Google Cloud monitoring\\"}),\\" and cloud computing services with Elastic\\\\u2019s search-powered platform.\\"]}),`\\n`,(0,l.jsx)(e.p,{children:(0,l.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(o={}){let{wrapper:e}=o.components||{};return e?(0,l.jsx)(e,{...o,children:(0,l.jsx)(d,{...o})}):d(o)}return v(R);})();\\n;return Component;"},"_id":"articles/how-to-deploy-hello-world-elastic-observability-google-cloud-run.mdx","_raw":{"sourceFilePath":"articles/how-to-deploy-hello-world-elastic-observability-google-cloud-run.mdx","sourceFileName":"how-to-deploy-hello-world-elastic-observability-google-cloud-run.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/how-to-deploy-hello-world-elastic-observability-google-cloud-run"},"type":"Article","imageUrl":"/assets/images/deploy-observability-google-cloud-run/illustration-dev-sec-ops-cloud-automations-1680x980.png","readingTime":"9 min read","url":"/deploy-observability-google-cloud-run","headings":[{"level":2,"title":"Elastic Observability setup","href":"#elastic-observability-setup"},{"level":2,"title":"Google Cloud Run setup","href":"#google-cloud-run-setup"},{"level":2,"title":"Deploy a Hello World web app to Cloud Run","href":"#deploy-a-hello-world-web-app-to-cloud-run"},{"level":2,"title":"Instrument the Hello World web app with Elastic Observability","href":"#instrument-the-hello-world-web-app-with-elastic-observability"},{"level":2,"title":"Observe the Hello World web app","href":"#observe-the-hello-world-web-app"},{"level":2,"title":"Observability made to scale","href":"#observability-made-to-scale"}]},{"title":"How to deploy a Hello World web app with Elastic Observability on Azure Container Apps","slug":"deploy-app-observability-azure-container-apps","date":"2023-10-23","description":"Follow the step-by-step process of instrumenting Elastic Observability for a Hello World web app running on Azure Container Apps.","image":"library-branding-elastic-observability-midnight-1680x980.png","author":[{"slug":"jonathan-simon","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"azure","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"azure-container-apps","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic Observability is the optimal tool to provide visibility into your running web apps. Microsoft Azure Container Apps is a fully managed environment that enables you to run containerized applications on a serverless platform so that your applications scale up and down. This allows you to accomplish the dual objective of serving every customer’s need for availability while meeting your needs to do so as efficiently as possible.\\n\\nUsing Elastic Observability and Azure Container Apps is a perfect combination for developers to deploy [web apps that are auto-scaled with fully observable operations](https://www.elastic.co/blog/observability-powerful-flexible-efficient).\\n\\nThis blog post will show you how to deploy a simple Hello World web app to Azure Container Apps and then walk you through the steps to instrument the Hello World web app to enable observation of the application’s operations with Elastic Cloud.\\n\\n## Elastic Observability setup\\n\\nWe’ll start with setting up an Elastic Cloud deployment, which is where observability will take place for the web app we’ll be deploying.\\n\\nFrom the [Elastic Cloud console](https://cloud.elastic.co), select **Create deployment**.\\n\\n![create deployment](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-1-create-deployment.png)\\n\\nEnter a deployment name and click **Create deployment**. It takes a few minutes for your deployment to be created. While waiting, you are prompted to save the admin credentials for your deployment, which provides you with superuser access to your Elastic\xae deployment. Keep these credentials safe as they are shown only once.\\n\\nElastic Observability requires an APM Server URL and an APM Secret token for an app to send observability data to Elastic Cloud. Once the deployment is created, we’ll copy the Elastic Observability server URL and secret token and store them somewhere safely for adding to our web app code in a later step.\\n\\nTo copy the APM Server URL and the APM Secret Token, go to [Elastic Cloud](https://cloud.elastic.co/home) . Then go to the [Deployments](https://cloud.elastic.co/deployments) page, which lists all of the deployments you have created. Select the deployment you want to use, which will open the deployment details page. In the **Kibana** row of links, click on **Open** to open Kibana\xae for your deployment.\\n\\n![my deployment](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-2-my-deployment.png)\\n\\nSelect **Integrations** from the top-level menu. Then click the **APM** tile.\\n\\n![apm](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-3-apm.png)\\n\\nOn the APM Agents page, copy the secretToken and the serverUrl values and save them for use in a later step.\\n\\n![apm agents](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-4-apm-agents.png)\\n\\nNow that we’ve completed the Elastic Cloud setup, the next step is to set up our account in Azure for deploying apps to the Container Apps service.\\n\\n## Azure Container Apps setup\\n\\nFirst we’ll need an Azure account, so let’s create one by going to the [Microsoft Azure portal](https://azure.microsoft.com) and creating a new project. Click the **Start free** button and follow the steps to sign in or create a new account.\\n\\n![azure start free](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-5-azure-start-free.png)\\n\\n## Deploy a Hello World web app to Container Apps\\n\\nWe’ll perform the process of deploying a C# Hello World web app to Container Apps using the handy Azure tool called [Cloud Shell](https://azure.microsoft.com/en-us/get-started/azure-portal/cloud-shell). To deploy the Hello World app, we’ll perform the following 12 steps:\\n\\n1. From the [Azure portal](https://portal.azure.com/), click the Cloud Shell icon at the top of the portal to open Cloud Shell…\\n\\n![cloud shell](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-6-cloud-shell.png)\\n\\n… and when the Cloud Shell first opens, select **Bash** as the shell type to use.\\n\\n![bash](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-7-bash.png)\\n\\n2. If you’re prompted that “You have no storage mounted,” then click the **Create storage** button to create a file store to be used for saving and editing files from Cloud Shell.\\n\\n![create storage](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-8-create-storage.png)\\n\\n3. In Cloud Shell, clone a [C# Hello World sample app](https://github.com/elastic/observability-examples/tree/main/azure/container-apps/helloworld) repo from GitHub by entering the following command.\\n\\n```bash\\ngit clone https://github.com/elastic/observability-examples\\n```\\n\\n4. Change directory to the location of the Hello World web app code.\\n\\n```bash\\ncd observability-examples/azure/container-apps/helloworld\\n```\\n\\n5. Define the environment variables that we’ll be using in the commands throughout this blog post.\\n\\n```bash\\nRESOURCE_GROUP=\\"helloworld-containerapps\\"\\nLOCATION=\\"centralus\\"\\nENVIRONMENT=\\"env-helloworld-containerapps\\"\\nAPP_NAME=\\"elastic-helloworld\\"\\n```\\n\\n6. Define a registry container name that is unique by running the following command.\\n\\n```bash\\nACR_NAME=\\"helloworld\\"$RANDOM\\n```\\n\\n7. Create an Azure resource group by running the following command.\\n\\n```bash\\naz group create --name $RESOURCE_GROUP --location \\"$LOCATION\\"\\n```\\n\\n8. Run the following command to create a registry container in Azure Container Registry.\\n\\n```bash\\naz acr create --resource-group $RESOURCE_GROUP \\\\\\n--name $ACR_NAME --sku Basic --admin-enable true\\n```\\n\\n9. Build the app image and push it to Azure Container Registry by running the following command.\\n\\n```bash\\naz acr build --registry $ACR_NAME --image $APP_NAME .\\n```\\n\\n10. Register the Microsoft.OperationalInsights namespace as a provider by running the following command.\\n\\n```bash\\naz provider register -n Microsoft.OperationalInsights --wait\\n```\\n\\n11. Run the following command to create a Container App environment for deploying your app into.\\n\\n```bash\\naz containerapp env create --name $ENVIRONMENT \\\\\\n--resource-group $RESOURCE_GROUP --location \\"$LOCATION\\"\\n```\\n\\n12. Create a new Container App by deploying the Hello World app’s image to Container Apps, using the following command.\\n\\n```bash\\naz containerapp create \\\\\\n --name $APP_NAME \\\\\\n --resource-group $RESOURCE_GROUP \\\\\\n --environment $ENVIRONMENT \\\\\\n --image $ACR_NAME.azurecr.io/$APP_NAME \\\\\\n --target-port 3500 \\\\\\n --ingress \'external\' \\\\\\n --registry-server $ACR_NAME.azurecr.io \\\\\\n --query properties.configuration.ingress.fqdn\\n```\\n\\nThis command will output the deployed Hello World app\'s fully qualified domain name (FQDN). Copy and paste the FQDN into a browser to see your running Hello World app.\\n\\n![hello world](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-9-hello-world.png)\\n\\n## Instrument the Hello World web app with Elastic Observability\\n\\nWith a web app successfully running in Container Apps, we’re now ready to add the minimal code necessary to enable observability for the Hello World app in Elastic Cloud. We’ll perform the following eight steps:\\n\\n1. In Azure Cloud Shell, create a new file named Telemetry.cs by typing the following command.\\n\\n```bash\\ntouch Telemetry.cs\\n```\\n\\n2. Open the Azure Cloud Shell file editor by typing the following command in Cloud Shell.\\n\\n```bash\\ncode .\\n```\\n\\n3. In the Azure Cloud Shell editor, open the Telemetry.cs file and paste in the following code. Save the edited file in Cloud Shell by pressing the [Ctrl] + [s] keys on your keyboard (or if you’re on a macOS computer, use the [⌘] + [s] keys). This class file is used to create a tracer ActivitySource, which can generate trace Activity spans for observability.\\n\\n```csharp\\nusing System.Diagnostics;\\n\\npublic static class Telemetry\\n{\\n\\tpublic static readonly ActivitySource activitySource = new(\\"Helloworld\\");\\n}\\n```\\n\\n4. In the Azure Cloud Shell editor, edit the file named Dockerfile to add the following Elastic OpenTelemetry environment variables. Replace the ELASTIC_APM_SERVER_URL text and the ELASTIC_APM_SECRET_TOKEN text with the APM Server URL and the APM Secret Token values that you copied and saved in an earlier step.\\n\\nSave the edited file in Cloud Shell by pressing the [Ctrl] + [s] keys on your keyboard (or if you’re on a macOS computer, use the [⌘] + [s] keys).\\n\\nThe updated Dockerfile should look something like this:\\n\\n```dockerfile\\nFROM ${ARCH}mcr.microsoft.com/dotnet/aspnet:7.0. AS base\\nWORKDIR /app\\n\\nFROM mcr.microsoft.com/dotnet/sdk:8.0-preview AS build\\nARG TARGETPLATFORM\\n\\nWORKDIR /src\\nCOPY [\\"helloworld.csproj\\", \\"./\\"]\\nRUN dotnet restore \\"./helloworld.csproj\\"\\nCOPY . .\\nWORKDIR \\"/src/.\\"\\nRUN dotnet build \\"helloworld.csproj\\" -c Release -o /app/build\\n\\nFROM build AS publish\\nRUN dotnet publish \\"helloworld.csproj\\" -c Release -o /app/publish\\n\\nFROM base AS final\\nWORKDIR /app\\nCOPY --from=publish /app/publish .\\nEXPOSE 3500\\nENV ASPNETCORE_URLS=https://+:3500\\n\\nENV OTEL_EXPORTER_OTLP_ENDPOINT=\'https://******.apm.us-east-2.aws.elastic-cloud.com:443\'\\nENV OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer ***********\'\\nENV OTEL_LOG_LEVEL=info\\nENV OTEL_METRICS_EXPORTER=otlp\\nENV OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\nENV OTEL_SERVICE_NAME=helloworld\\nENV OTEL_TRACES_EXPORTER=otlp\\n\\nENTRYPOINT [\\"dotnet\\", \\"helloworld.dll\\"]\\n```\\n\\n5. In the Azure Cloud Shell editor, edit the helloworld.csproj file to add the Elastic APM and OpenTelemetry dependencies. The updated helloworld.csproj file should look something like this:\\n\\n```xml\\n\\n\\n\\n \\n\\tnet7.0\\n\\tenable\\n\\tenable\\n \\n \\n\\t\\n\\t\\n\\t\\n\\t\\n\\t\\n\\t\\n\\t\\n \\n\\n\\n```\\n\\n6. In the Azure Cloud Shell editor, edit the Program.cs:\\n\\n- Add a using statement at the top of the file to import System.Diagnostics, which is used to create Activities that are equivalent to “spans” in OpenTelemetry. Also import the OpenTelemetry.Resources and OpenTelemetry.Trace packages.\\n\\n```csharp\\nusing System.Diagnostics;\\nusing OpenTelemetry.Resources;\\nusing OpenTelemetry.Trace;\\n```\\n\\n- Update the “builder” initialization code block to include configuration to enable Elastic OpenTelemetry observability.\\n\\n```csharp\\nbuilder.Services.AddOpenTelemetry().WithTracing(builder => builder.AddOtlpExporter()\\n \\t.AddSource(\\"helloworld\\")\\n \\t.AddAspNetCoreInstrumentation()\\n \\t.AddOtlpExporter()\\n \\t.ConfigureResource(resource =>\\n \\tresource.AddService(\\n \\tserviceName: \\"helloworld\\"))\\n);\\nbuilder.Services.AddControllers();\\n```\\n\\n- Replace the “Hello World!” HTML output string…\\n\\n```html\\nHello World!
\\n```\\n\\n- ...with the “Hello Elastic Observability” HTML output string.\\n\\n```html\\n\\n
\\n Hello Elastic Observability - Azure Container Apps - C#\\n
\\n
\\n
\\n```\\n\\n- Add a telemetry trace span around the output response utilizing the Telemetry class’ ActivitySource.\\n\\n```csharp\\nusing (Activity activity = Telemetry.activitySource.StartActivity(\\"HelloSpan\\")!)\\n \\t{\\n \\t\\tConsole.Write(\\"hello\\");\\n \\t\\tawait context.Response.WriteAsync(output);\\n \\t}\\n```\\n\\nThe updated Program.cs file should look something like this:\\n\\n```csharp\\nusing System.Diagnostics;\\nusing OpenTelemetry.Resources;\\nusing OpenTelemetry.Trace;\\n\\nvar builder = WebApplication.CreateBuilder(args);\\nbuilder.Services.AddOpenTelemetry().WithTracing(builder => builder.AddOtlpExporter()\\n \\t.AddSource(\\"helloworld\\")\\n \\t.AddAspNetCoreInstrumentation()\\n \\t.AddOtlpExporter()\\n \\t.ConfigureResource(resource =>\\n \\tresource.AddService(\\n \\tserviceName: \\"helloworld\\"))\\n);\\nbuilder.Services.AddControllers();\\nvar app = builder.Build();\\n\\nstring output =\\n\\"\\"\\"\\n\\n
\\nHello Elastic Observability - Azure Container Apps - C#\\n
\\n
\\n
\\n\\"\\"\\";\\n\\napp.MapGet(\\"/\\", async context =>\\n\\t{\\n \\tusing (Activity activity = Telemetry.activitySource.StartActivity(\\"HelloSpan\\")!)\\n \\t\\t{\\n \\t\\tConsole.Write(\\"hello\\");\\n \\t\\tawait context.Response.WriteAsync(output);\\n \\t\\t}\\n\\t}\\n);\\napp.Run();\\n```\\n\\n7. Rebuild the Hello World app image and push the image to the Azure Container Registry by running the following command.\\n\\n```bash\\naz acr build --registry $ACR_NAME --image $APP_NAME .\\n```\\n\\n8. Redeploy the updated Hello World app to Azure Container Apps, using the following command.\\n\\n```bash\\naz containerapp create \\\\\\n --name $APP_NAME \\\\\\n --resource-group $RESOURCE_GROUP \\\\\\n --environment $ENVIRONMENT \\\\\\n --image $ACR_NAME.azurecr.io/$APP_NAME \\\\\\n --target-port 3500 \\\\\\n --ingress \'external\' \\\\\\n --registry-server $ACR_NAME.azurecr.io \\\\\\n --query properties.configuration.ingress.fqdn\\n```\\n\\nThis command will output the deployed Hello World app\'s fully qualified domain name (FQDN). Copy and paste the FQDN into a browser to see the updated Hello World app running in Azure Container Apps.\\n\\n![hello observability](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-10-elastic-hello-observability.png)\\n\\n## Observe the Hello World web app\\n\\nNow that we’ve instrumented the web app to send observability data to Elastic Observability, we can now use Elastic Cloud to monitor the web app’s operations.\\n\\n1. In Elastic Cloud, select the Observability **Services** menu item.\\n\\n2. Click the **helloworld** service.\\n\\n3. Click the **Transactions** tab.\\n\\n4. Scroll down and click the **GET /** transaction.Scroll down to the **Trace Sample** section to see the **GET /** , **HelloSpan** trace sample.\\n\\n![latency-distribution](/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-12-latency-distribution.png)\\n\\n## Observability made to scale\\n\\nYou’ve seen the entire process of deploying a web app to Azure Container Apps that is instrumented with Elastic Observability. This web app is now fully available on the web running on a platform that will auto-scale to serve visitors worldwide. And it’s instrumented for Elastic Observability APM using OpenTelemetry to ingest data into Elastic Cloud’s Kibana dashboards.\\n\\nNow that you’ve seen how to deploy a Hello World web app with a basic observability setup, visit [Elastic Observability](https://www.elastic.co/observability) to learn more about expanding to a full scale observability coverage solution for your apps. Or visit [Getting started with Elastic on Microsoft Azure](https://www.elastic.co/getting-started/microsoft-azure) for more examples of how you can drive the data insights you need by combining Microsoft Azure’s cloud computing services with Elastic’s search-powered platform.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var h=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var y=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),w=(t,e)=>{for(var l in e)o(t,l,{get:e[l],enumerable:!0})},a=(t,e,l,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of g(e))!b.call(t,r)&&r!==l&&o(t,r,{get:()=>e[r],enumerable:!(i=u(e,r))||i.enumerable});return t};var f=(t,e,l)=>(l=t!=null?h(m(t)):{},a(e||!t||!t.__esModule?o(l,\\"default\\",{value:t,enumerable:!0}):l,t)),A=t=>a(o({},\\"__esModule\\",{value:!0}),t);var c=y((O,s)=>{s.exports=_jsx_runtime});var E={};w(E,{default:()=>p,frontmatter:()=>v});var n=f(c()),v={title:\\"How to deploy a Hello World web app with Elastic Observability on Azure Container Apps\\",slug:\\"deploy-app-observability-azure-container-apps\\",date:\\"2023-10-23\\",description:\\"Follow the step-by-step process of instrumenting Elastic Observability for a Hello World web app running on Azure Container Apps.\\",author:[{slug:\\"jonathan-simon\\"}],image:\\"library-branding-elastic-observability-midnight-1680x980.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"azure\\"},{slug:\\"apm\\"},{slug:\\"azure-container-apps\\"}]};function d(t){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"Elastic Observability is the optimal tool to provide visibility into your running web apps. Microsoft Azure Container Apps is a fully managed environment that enables you to run containerized applications on a serverless platform so that your applications scale up and down. This allows you to accomplish the dual objective of serving every customer\\\\u2019s need for availability while meeting your needs to do so as efficiently as possible.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Using Elastic Observability and Azure Container Apps is a perfect combination for developers to deploy \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-powerful-flexible-efficient\\",rel:\\"nofollow\\",children:\\"web apps that are auto-scaled with fully observable operations\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"This blog post will show you how to deploy a simple Hello World web app to Azure Container Apps and then walk you through the steps to instrument the Hello World web app to enable observation of the application\\\\u2019s operations with Elastic Cloud.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"elastic-observability-setup\\",children:\\"Elastic Observability setup\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We\\\\u2019ll start with setting up an Elastic Cloud deployment, which is where observability will take place for the web app we\\\\u2019ll be deploying.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"From the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud console\\"}),\\", select \\",(0,n.jsx)(e.strong,{children:\\"Create deployment\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-1-create-deployment.png\\",alt:\\"create deployment\\",width:\\"1524\\",height:\\"1362\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Enter a deployment name and click \\",(0,n.jsx)(e.strong,{children:\\"Create deployment\\"}),\\". It takes a few minutes for your deployment to be created. While waiting, you are prompted to save the admin credentials for your deployment, which provides you with superuser access to your Elastic\\\\xAE deployment. Keep these credentials safe as they are shown only once.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic Observability requires an APM Server URL and an APM Secret token for an app to send observability data to Elastic Cloud. Once the deployment is created, we\\\\u2019ll copy the Elastic Observability server URL and secret token and store them somewhere safely for adding to our web app code in a later step.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To copy the APM Server URL and the APM Secret Token, go to \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" . Then go to the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/deployments\\",rel:\\"nofollow\\",children:\\"Deployments\\"}),\\" page, which lists all of the deployments you have created. Select the deployment you want to use, which will open the deployment details page. In the \\",(0,n.jsx)(e.strong,{children:\\"Kibana\\"}),\\" row of links, click on \\",(0,n.jsx)(e.strong,{children:\\"Open\\"}),\\" to open Kibana\\\\xAE for your deployment.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-2-my-deployment.png\\",alt:\\"my deployment\\",width:\\"1416\\",height:\\"1095\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Select \\",(0,n.jsx)(e.strong,{children:\\"Integrations\\"}),\\" from the top-level menu. Then click the \\",(0,n.jsx)(e.strong,{children:\\"APM\\"}),\\" tile.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-3-apm.png\\",alt:\\"apm\\",width:\\"1377\\",height:\\"1206\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"On the APM Agents page, copy the secretToken and the serverUrl values and save them for use in a later step.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-4-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1999\\",height:\\"1415\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that we\\\\u2019ve completed the Elastic Cloud setup, the next step is to set up our account in Azure for deploying apps to the Container Apps service.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"azure-container-apps-setup\\",children:\\"Azure Container Apps setup\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"First we\\\\u2019ll need an Azure account, so let\\\\u2019s create one by going to the \\",(0,n.jsx)(e.a,{href:\\"https://azure.microsoft.com\\",rel:\\"nofollow\\",children:\\"Microsoft Azure portal\\"}),\\" and creating a new project. Click the \\",(0,n.jsx)(e.strong,{children:\\"Start free\\"}),\\" button and follow the steps to sign in or create a new account.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-5-azure-start-free.png\\",alt:\\"azure start free\\",width:\\"1803\\",height:\\"1047\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"deploy-a-hello-world-web-app-to-container-apps\\",children:\\"Deploy a Hello World web app to Container Apps\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We\\\\u2019ll perform the process of deploying a C# Hello World web app to Container Apps using the handy Azure tool called \\",(0,n.jsx)(e.a,{href:\\"https://azure.microsoft.com/en-us/get-started/azure-portal/cloud-shell\\",rel:\\"nofollow\\",children:\\"Cloud Shell\\"}),\\". To deploy the Hello World app, we\\\\u2019ll perform the following 12 steps:\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"From the \\",(0,n.jsx)(e.a,{href:\\"https://portal.azure.com/\\",rel:\\"nofollow\\",children:\\"Azure portal\\"}),\\", click the Cloud Shell icon at the top of the portal to open Cloud Shell\\\\u2026\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-6-cloud-shell.png\\",alt:\\"cloud shell\\",width:\\"1824\\",height:\\"408\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"\\\\u2026 and when the Cloud Shell first opens, select \\",(0,n.jsx)(e.strong,{children:\\"Bash\\"}),\\" as the shell type to use.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-7-bash.png\\",alt:\\"bash\\",width:\\"1848\\",height:\\"994\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"If you\\\\u2019re prompted that \\\\u201CYou have no storage mounted,\\\\u201D then click the \\",(0,n.jsx)(e.strong,{children:\\"Create storage\\"}),\\" button to create a file store to be used for saving and editing files from Cloud Shell.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-8-create-storage.png\\",alt:\\"create storage\\",width:\\"1815\\",height:\\"990\\"})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"In Cloud Shell, clone a \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/azure/container-apps/helloworld\\",rel:\\"nofollow\\",children:\\"C# Hello World sample app\\"}),\\" repo from GitHub by entering the following command.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/elastic/observability-examples\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Change directory to the location of the Hello World web app code.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`cd observability-examples/azure/container-apps/helloworld\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Define the environment variables that we\\\\u2019ll be using in the commands throughout this blog post.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`RESOURCE_GROUP=\\"helloworld-containerapps\\"\\nLOCATION=\\"centralus\\"\\nENVIRONMENT=\\"env-helloworld-containerapps\\"\\nAPP_NAME=\\"elastic-helloworld\\"\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"6\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Define a registry container name that is unique by running the following command.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`ACR_NAME=\\"helloworld\\"$RANDOM\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"7\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Create an Azure resource group by running the following command.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az group create --name $RESOURCE_GROUP --location \\"$LOCATION\\"\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"8\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Run the following command to create a registry container in Azure Container Registry.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az acr create --resource-group $RESOURCE_GROUP \\\\\\\\\\n--name $ACR_NAME --sku Basic --admin-enable true\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"9\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Build the app image and push it to Azure Container Registry by running the following command.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az acr build --registry $ACR_NAME --image $APP_NAME .\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"10\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Register the Microsoft.OperationalInsights namespace as a provider by running the following command.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az provider register -n Microsoft.OperationalInsights --wait\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"11\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Run the following command to create a Container App environment for deploying your app into.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az containerapp env create --name $ENVIRONMENT \\\\\\\\\\n--resource-group $RESOURCE_GROUP --location \\"$LOCATION\\"\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"12\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Create a new Container App by deploying the Hello World app\\\\u2019s image to Container Apps, using the following command.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az containerapp create \\\\\\\\\\n --name $APP_NAME \\\\\\\\\\n --resource-group $RESOURCE_GROUP \\\\\\\\\\n --environment $ENVIRONMENT \\\\\\\\\\n --image $ACR_NAME.azurecr.io/$APP_NAME \\\\\\\\\\n --target-port 3500 \\\\\\\\\\n --ingress \'external\' \\\\\\\\\\n --registry-server $ACR_NAME.azurecr.io \\\\\\\\\\n --query properties.configuration.ingress.fqdn\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This command will output the deployed Hello World app\'s fully qualified domain name (FQDN). Copy and paste the FQDN into a browser to see your running Hello World app.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-9-hello-world.png\\",alt:\\"hello world\\",width:\\"1977\\",height:\\"285\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"instrument-the-hello-world-web-app-with-elastic-observability\\",children:\\"Instrument the Hello World web app with Elastic Observability\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"With a web app successfully running in Container Apps, we\\\\u2019re now ready to add the minimal code necessary to enable observability for the Hello World app in Elastic Cloud. We\\\\u2019ll perform the following eight steps:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"In Azure Cloud Shell, create a new file named Telemetry.cs by typing the following command.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`touch Telemetry.cs\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Open the Azure Cloud Shell file editor by typing the following command in Cloud Shell.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`code .\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"In the Azure Cloud Shell editor, open the Telemetry.cs file and paste in the following code. Save the edited file in Cloud Shell by pressing the [Ctrl] + [s] keys on your keyboard (or if you\\\\u2019re on a macOS computer, use the [\\\\u2318] + [s] keys). This class file is used to create a tracer ActivitySource, which can generate trace Activity spans for observability.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`using System.Diagnostics;\\n\\npublic static class Telemetry\\n{\\n\\tpublic static readonly ActivitySource activitySource = new(\\"Helloworld\\");\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"In the Azure Cloud Shell editor, edit the file named Dockerfile to add the following Elastic OpenTelemetry environment variables. Replace the ELASTIC_APM_SERVER_URL text and the ELASTIC_APM_SECRET_TOKEN text with the APM Server URL and the APM Secret Token values that you copied and saved in an earlier step.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Save the edited file in Cloud Shell by pressing the [Ctrl] + [s] keys on your keyboard (or if you\\\\u2019re on a macOS computer, use the [\\\\u2318] + [s] keys).\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The updated Dockerfile should look something like this:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM \\\\${ARCH}mcr.microsoft.com/dotnet/aspnet:7.0. AS base\\nWORKDIR /app\\n\\nFROM mcr.microsoft.com/dotnet/sdk:8.0-preview AS build\\nARG TARGETPLATFORM\\n\\nWORKDIR /src\\nCOPY [\\"helloworld.csproj\\", \\"./\\"]\\nRUN dotnet restore \\"./helloworld.csproj\\"\\nCOPY . .\\nWORKDIR \\"/src/.\\"\\nRUN dotnet build \\"helloworld.csproj\\" -c Release -o /app/build\\n\\nFROM build AS publish\\nRUN dotnet publish \\"helloworld.csproj\\" -c Release -o /app/publish\\n\\nFROM base AS final\\nWORKDIR /app\\nCOPY --from=publish /app/publish .\\nEXPOSE 3500\\nENV ASPNETCORE_URLS=https://+:3500\\n\\nENV OTEL_EXPORTER_OTLP_ENDPOINT=\'https://******.apm.us-east-2.aws.elastic-cloud.com:443\'\\nENV OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer ***********\'\\nENV OTEL_LOG_LEVEL=info\\nENV OTEL_METRICS_EXPORTER=otlp\\nENV OTEL_RESOURCE_ATTRIBUTES=service.version=1.0,deployment.environment=production\\nENV OTEL_SERVICE_NAME=helloworld\\nENV OTEL_TRACES_EXPORTER=otlp\\n\\nENTRYPOINT [\\"dotnet\\", \\"helloworld.dll\\"]\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"In the Azure Cloud Shell editor, edit the helloworld.csproj file to add the Elastic APM and OpenTelemetry dependencies. The updated helloworld.csproj file should look something like this:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-xml\\",children:`\\n\\n\\n \\n\\tnet7.0\\n\\tenable\\n\\tenable\\n \\n \\n\\t\\n\\t\\n\\t\\n\\t\\n\\t\\n\\t\\n\\t\\n \\n\\n\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"6\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"In the Azure Cloud Shell editor, edit the Program.cs:\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Add a using statement at the top of the file to import System.Diagnostics, which is used to create Activities that are equivalent to \\\\u201Cspans\\\\u201D in OpenTelemetry. Also import the OpenTelemetry.Resources and OpenTelemetry.Trace packages.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`using System.Diagnostics;\\nusing OpenTelemetry.Resources;\\nusing OpenTelemetry.Trace;\\n`})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Update the \\\\u201Cbuilder\\\\u201D initialization code block to include configuration to enable Elastic OpenTelemetry observability.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`builder.Services.AddOpenTelemetry().WithTracing(builder => builder.AddOtlpExporter()\\n \\t.AddSource(\\"helloworld\\")\\n \\t.AddAspNetCoreInstrumentation()\\n \\t.AddOtlpExporter()\\n \\t.ConfigureResource(resource =>\\n \\tresource.AddService(\\n \\tserviceName: \\"helloworld\\"))\\n);\\nbuilder.Services.AddControllers();\\n`})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Replace the \\\\u201CHello World!\\\\u201D HTML output string\\\\u2026\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-html\\",children:`Hello World!
\\n`})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"...with the \\\\u201CHello Elastic Observability\\\\u201D HTML output string.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-html\\",children:`\\n
\\n Hello Elastic Observability - Azure Container Apps - C#\\n
\\n
\\n
\\n`})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Add a telemetry trace span around the output response utilizing the Telemetry class\\\\u2019 ActivitySource.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`using (Activity activity = Telemetry.activitySource.StartActivity(\\"HelloSpan\\")!)\\n \\t{\\n \\t\\tConsole.Write(\\"hello\\");\\n \\t\\tawait context.Response.WriteAsync(output);\\n \\t}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The updated Program.cs file should look something like this:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`using System.Diagnostics;\\nusing OpenTelemetry.Resources;\\nusing OpenTelemetry.Trace;\\n\\nvar builder = WebApplication.CreateBuilder(args);\\nbuilder.Services.AddOpenTelemetry().WithTracing(builder => builder.AddOtlpExporter()\\n \\t.AddSource(\\"helloworld\\")\\n \\t.AddAspNetCoreInstrumentation()\\n \\t.AddOtlpExporter()\\n \\t.ConfigureResource(resource =>\\n \\tresource.AddService(\\n \\tserviceName: \\"helloworld\\"))\\n);\\nbuilder.Services.AddControllers();\\nvar app = builder.Build();\\n\\nstring output =\\n\\"\\"\\"\\n\\n
\\nHello Elastic Observability - Azure Container Apps - C#\\n
\\n
\\n
\\n\\"\\"\\";\\n\\napp.MapGet(\\"/\\", async context =>\\n\\t{\\n \\tusing (Activity activity = Telemetry.activitySource.StartActivity(\\"HelloSpan\\")!)\\n \\t\\t{\\n \\t\\tConsole.Write(\\"hello\\");\\n \\t\\tawait context.Response.WriteAsync(output);\\n \\t\\t}\\n\\t}\\n);\\napp.Run();\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"7\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Rebuild the Hello World app image and push the image to the Azure Container Registry by running the following command.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az acr build --registry $ACR_NAME --image $APP_NAME .\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"8\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Redeploy the updated Hello World app to Azure Container Apps, using the following command.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az containerapp create \\\\\\\\\\n --name $APP_NAME \\\\\\\\\\n --resource-group $RESOURCE_GROUP \\\\\\\\\\n --environment $ENVIRONMENT \\\\\\\\\\n --image $ACR_NAME.azurecr.io/$APP_NAME \\\\\\\\\\n --target-port 3500 \\\\\\\\\\n --ingress \'external\' \\\\\\\\\\n --registry-server $ACR_NAME.azurecr.io \\\\\\\\\\n --query properties.configuration.ingress.fqdn\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This command will output the deployed Hello World app\'s fully qualified domain name (FQDN). Copy and paste the FQDN into a browser to see the updated Hello World app running in Azure Container Apps.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-10-elastic-hello-observability.png\\",alt:\\"hello observability\\",width:\\"1999\\",height:\\"977\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"observe-the-hello-world-web-app\\",children:\\"Observe the Hello World web app\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that we\\\\u2019ve instrumented the web app to send observability data to Elastic Observability, we can now use Elastic Cloud to monitor the web app\\\\u2019s operations.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"In Elastic Cloud, select the Observability \\",(0,n.jsx)(e.strong,{children:\\"Services\\"}),\\" menu item.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Click the \\",(0,n.jsx)(e.strong,{children:\\"helloworld\\"}),\\" service.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Click the \\",(0,n.jsx)(e.strong,{children:\\"Transactions\\"}),\\" tab.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Scroll down and click the \\",(0,n.jsx)(e.strong,{children:\\"GET /\\"}),\\" transaction.Scroll down to the \\",(0,n.jsx)(e.strong,{children:\\"Trace Sample\\"}),\\" section to see the \\",(0,n.jsx)(e.strong,{children:\\"GET /\\"}),\\" , \\",(0,n.jsx)(e.strong,{children:\\"HelloSpan\\"}),\\" trace sample.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/deploy-app-observability-azure-container-apps/elastic-blog-12-latency-distribution.png\\",alt:\\"latency-distribution\\",width:\\"1999\\",height:\\"1097\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"observability-made-to-scale\\",children:\\"Observability made to scale\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"You\\\\u2019ve seen the entire process of deploying a web app to Azure Container Apps that is instrumented with Elastic Observability. This web app is now fully available on the web running on a platform that will auto-scale to serve visitors worldwide. And it\\\\u2019s instrumented for Elastic Observability APM using OpenTelemetry to ingest data into Elastic Cloud\\\\u2019s Kibana dashboards.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now that you\\\\u2019ve seen how to deploy a Hello World web app with a basic observability setup, visit \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\"}),\\" to learn more about expanding to a full scale observability coverage solution for your apps. Or visit \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/getting-started/microsoft-azure\\",rel:\\"nofollow\\",children:\\"Getting started with Elastic on Microsoft Azure\\"}),\\" for more examples of how you can drive the data insights you need by combining Microsoft Azure\\\\u2019s cloud computing services with Elastic\\\\u2019s search-powered platform.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function p(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(d,{...t})}):d(t)}return A(E);})();\\n;return Component;"},"_id":"articles/how-to-deploy-hello-world-web-app-elastic-observability-azure-container-apps.mdx","_raw":{"sourceFilePath":"articles/how-to-deploy-hello-world-web-app-elastic-observability-azure-container-apps.mdx","sourceFileName":"how-to-deploy-hello-world-web-app-elastic-observability-azure-container-apps.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/how-to-deploy-hello-world-web-app-elastic-observability-azure-container-apps"},"type":"Article","imageUrl":"/assets/images/deploy-app-observability-azure-container-apps/library-branding-elastic-observability-midnight-1680x980.png","readingTime":"12 min read","url":"/deploy-app-observability-azure-container-apps","headings":[{"level":2,"title":"Elastic Observability setup","href":"#elastic-observability-setup"},{"level":2,"title":"Azure Container Apps setup","href":"#azure-container-apps-setup"},{"level":2,"title":"Deploy a Hello World web app to Container Apps","href":"#deploy-a-hello-world-web-app-to-container-apps"},{"level":2,"title":"Instrument the Hello World web app with Elastic Observability","href":"#instrument-the-hello-world-web-app-with-elastic-observability"},{"level":2,"title":"Observe the Hello World web app","href":"#observe-the-hello-world-web-app"},{"level":2,"title":"Observability made to scale","href":"#observability-made-to-scale"}]},{"title":"How to monitor Kafka and Confluent Cloud with Elastic Observability","slug":"monitor-kafka-confluent-cloud-elastic-observability","date":"2023-04-03","description":"This blog post will take you through best practices to observe Kafka-based solutions implemented on Confluent Cloud with Elastic Observability.","image":"patterns-white-background-no-logo-observability_(1).png","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"confluent","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe blog will take you through best practices to observe Kafka-based solutions implemented on Confluent Cloud with Elastic Observability. (To monitor Kafka brokers that are not in Confluent Cloud, I recommend checking out [this blog](https://www.elastic.co/blog/how-to-monitor-containerized-kafka-with-elastic-observability).) We will instrument Kafka applications with [Elastic APM](https://www.elastic.co/observability/application-performance-monitoring), use the Confluent Cloud metrics endpoint to get data about brokers, and pull it all together with a unified Kafka and Confluent Cloud monitoring dashboard in [Elastic Observability](https://www.elastic.co/observability).\\n\\n## Using full-stack Elastic Observability to understand Kafka and Confluent performance\\n\\nIn the [2023 Dice Tech Salary Report](https://dice.viewer.foleon.com/ebooks/dice-tech-salary-report-explore/), Elasticsearch and Kakfa are ranked #3 and #5 out of the top 12 [most in demand skills](https://dice.viewer.foleon.com/ebooks/dice-tech-salary-report-explore/salary-trends#Skills) at the moment, so it’s no surprise that we are seeing a large number of customers who are implementing data in motion with Kafka.\\n\\n[Kafka](https://www.elastic.co/integrations/data-integrations?search=kafka) comes with some additional complexities that go beyond traditional architectures and which make observability an even more important topic. Understanding where the bottlenecks are in messaging and stream-based architectures can be tough. This is why you need a comprehensive observability solution with [machine learning](https://www.elastic.co/blog/aiops-use-cases-observability-operations) to help you.\\n\\nIn this blog, we will explore how to get Kafka applications instrumented with [Elastic APM](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions), how to collect performance data with JMX, and how you can use the Elasticsearch Platform to pull in data from Confluent Cloud — which is by far the easiest and most cost-effective way to implement Kafka architectures.\\n\\nFor this blog post, we will be following the code at this [git repository](https://github.com/davidgeorgehope/multi-cloud). There are three services here that are designed to run on two clouds and push data from one cloud to the other and finally into Google BigQuery. We want to monitor all of this using Elastic Observability to give you a complete picture of Confluent and Kafka Services performance as a teaser — this is the goal below:\\n\\n![kafka producer metrics](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-producer_metrics.png)\\n\\n## A look at the architecture\\n\\nAs mentioned, we have three [multi-cloud services](https://www.elastic.co/observability/cloud-monitoring) implemented in our example application.\\n\\nThe first service is a Spring WebFlux service that runs inside AWS EKS. This service will take a message from a REST Endpoint and simply put it straight on to a Kafka topic.\\n\\nThe second service, which is also a Spring WebFlux service hosted inside Google Cloud Platform (GCP) with its [Google Cloud monitoring](https://www.elastic.co/observability/google-cloud-monitoring), will then pick this up and forward it to another service that will put the message into BigQuery.\\n\\nThese services are all instrumented using Elastic APM. For this blog, we have decided to use Spring config to inject and configure the APM agent. You could of course use the “-javaagent” argument to inject the agent instead if preferred.\\n\\n![aws kafka google cloud](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-obsevability-aws-kafka-google-cloud.png)\\n\\n## Getting started with Elastic Observability and Confluent Cloud\\n\\nBefore we dive into the application and its configuration, you will want to get an Elastic Cloud and Confluent Cloud account. You can sign up here for [Elastic](https://www.elastic.co/cloud/) and here for [Confluent Cloud](https://www.confluent.io/confluent-cloud/). There are some initial configuration steps we need to do inside Confluent Cloud, as you will need to create three topics: gcpTopic, myTopic, and topic_2.\\n\\nWhen you sign up for Confluent Cloud, you will be given an option of what type of cluster to create. For this walk-through, a Basic cluster is fine (as shown) — if you are careful about usage, it will not cost you a penny.\\n\\n![confluent create cluster](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-confluent-create-cluster.png)\\n\\nOnce you have a cluster, go ahead and create the three topics.\\n\\n![confluent topics](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-confluent-topics.png)\\n\\nFor this walk-through, you will only need to create single partition topics as shown below:\\n\\n![new topic](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-new-topic.png)\\n\\nNow we are ready to set up the Elastic Cloud cluster.\\n\\n![create a deployment](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-create-a-deployment.png)\\n\\nOne thing to note here is that when setting up an Elastic cluster, the defaults are mostly OK. With one minor tweak to add in the Machine Learning under “Advanced Settings,” add capacity for machine learning here.\\n\\n![machine learning instances](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-machine-learning-instances.png)\\n\\n## Getting APM up and running\\n\\nThe first thing we want to do here is get our Spring Boot Webflux-based services up and running. For this blog, I have decided to implement this using the Spring Configuration, as you can see below. For brevity, I have not listed all the JMX configuration information, but you can see those details in [GitHub](https://github.com/davidgeorgehope/multi-cloud/blob/main/aws-multi-cloud/src/main/java/com/elastic/multicloud/ElasticApmConfig.java).\\n\\n```java\\npackage com.elastic.multicloud;\\nimport co.elastic.apm.attach.ElasticApmAttacher;\\nimport jakarta.annotation.PostConstruct;\\nimport lombok.Setter;\\nimport org.slf4j.Logger;\\nimport org.slf4j.LoggerFactory;\\nimport org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;\\nimport org.springframework.boot.context.properties.ConfigurationProperties;\\nimport org.springframework.context.annotation.Configuration;\\n\\nimport java.util.HashMap;\\nimport java.util.Map;\\n\\n@Setter\\n@Configuration\\n@ConfigurationProperties(prefix = \\"elastic.apm\\")\\n@ConditionalOnProperty(value = \\"elastic.apm.enabled\\", havingValue = \\"true\\")\\npublic class ElasticApmConfig {\\n\\n private static final String SERVER_URL_KEY = \\"server_url\\";\\n private String serverUrl;\\n\\n private static final String SERVICE_NAME_KEY = \\"service_name\\";\\n private String serviceName;\\n\\n private static final String SECRET_TOKEN_KEY = \\"secret_token\\";\\n private String secretToken;\\n\\n private static final String ENVIRONMENT_KEY = \\"environment\\";\\n private String environment;\\n\\n private static final String APPLICATION_PACKAGES_KEY = \\"application_packages\\";\\n private String applicationPackages;\\n\\n private static final String LOG_LEVEL_KEY = \\"log_level\\";\\n private String logLevel;\\n private static final Logger LOGGER = LoggerFactory.getLogger(ElasticApmConfig.class);\\n\\n @PostConstruct\\n public void init() {\\n LOGGER.info(environment);\\n\\n Map apmProps = new HashMap<>(6);\\n apmProps.put(SERVER_URL_KEY, serverUrl);\\n apmProps.put(SERVICE_NAME_KEY, serviceName);\\n apmProps.put(SECRET_TOKEN_KEY, secretToken);\\n apmProps.put(ENVIRONMENT_KEY, environment);\\n apmProps.put(APPLICATION_PACKAGES_KEY, applicationPackages);\\n apmProps.put(LOG_LEVEL_KEY, logLevel);\\n apmProps.put(\\"enable_experimental_instrumentations\\",\\"true\\");\\n apmProps.put(\\"capture_jmx_metrics\\",\\"object_name[kafka.producer:type=producer-metrics,client-id=*] attribute[batch-size-avg:metric_name=kafka.producer.batch-size-avg]\\");\\n\\n\\n ElasticApmAttacher.attach(apmProps);\\n }\\n}\\n```\\n\\nNow obviously this requires some dependencies, which you can see here in the Maven pom.xml.\\n\\n```xml\\n\\n\\t\\t\\tco.elastic.apm\\n\\t\\t\\tapm-agent-attach\\n\\t\\t\\t1.35.1-SNAPSHOT\\n\\t\\t\\n\\t\\t\\n\\t\\t\\tco.elastic.apm\\n\\t\\t\\tapm-agent-api\\n\\t\\t\\t1.35.1-SNAPSHOT\\n\\t\\t\\n```\\n\\nStrictly speaking, the agent-api is not required, but it could be useful if you have a desire to add your own monitoring code (as per the example below). The agent will happily auto-instrument without needing to do that though.\\n\\n```java\\nTransaction transaction = ElasticApm.currentTransaction();\\n Span span = ElasticApm.currentSpan()\\n .startSpan(\\"external\\", \\"kafka\\", null)\\n .setName(\\"DAVID\\").setServiceTarget(\\"kafka\\",\\"gcp-elastic-apm-spring-boot-integration\\");\\n try (final Scope scope = transaction.activate()) {\\n span.injectTraceHeaders((name, value) -> producerRecord.headers().add(name,value.getBytes()));\\n return Mono.fromRunnable(() -> {\\n kafkaTemplate.send(producerRecord);\\n });\\n } catch (Exception e) {\\n span.captureException(e);\\n throw e;\\n } finally {\\n span.end();\\n }\\n```\\n\\nNow we have enough code to get our agent bootstrapped.\\n\\nTo get the code from the GitHub repository up and running, you will need the following installed on your system and to ensure that you have the credentials for your GCP and AWS cloud.\\n\\n```java\\n\\nJava\\nMaven\\nDocker\\nKubernetes CLI (kubectl)\\n```\\n\\n### Clone the project\\n\\nClone the multi-cloud Spring project to your local machine.\\n\\n```bash\\ngit clone https://github.com/davidgeorgehope/multi-cloud\\n```\\n\\n### Build the project\\n\\nFrom each service in the project (aws-multi-cloud, gcp-multi-cloud, gcp-bigdata-consumer-multi-cloud), run the following commands to build the project.\\n\\n```bash\\nmvn clean install\\n```\\n\\nNow you can run the Java project locally.\\n\\n```java\\njava -jar gcp-bigdata-consumer-multi-cloud-0.0.1-SNAPSHOT.jar --spring.config.location=/Users/davidhope/applicaiton-gcp.properties\\n```\\n\\nThat will just get the Java application running locally, but you can also deploy this to Kubernetes using EKS and GKE as shown below.\\n\\n### Create a Docker image\\n\\nCreate a Docker image from the built project using the dockerBuild.sh provided in the project. You may want to customize this shell script to upload the built docker image to your own docker repository.\\n\\n```bash\\n./dockerBuild.sh\\n```\\n\\n### Create a namespace for each service\\n\\n```bash\\nkubectl create namespace aws\\n```\\n\\n```bash\\nkubectl create namespace gcp-1\\n```\\n\\n```bash\\nkubectl create namespace gcp-2\\n```\\n\\nOnce you have the namespaces created, you can switch context using the following command:\\n\\n```bash\\nkubectl config set-context --current --namespace=my-namespace\\n```\\n\\n### Configuration for each service\\n\\nEach service needs an application.properties file. I have put an example [here](https://github.com/davidgeorgehope/multi-cloud/blob/main/gcp-bigdata-consumer-multi-cloud/application.properties).\\n\\nYou will need to replace the following properties with those you find in Elastic.\\n\\n```bash\\nelastic.apm.server-url=\\nelastic.apm.secret-token=\\n```\\n\\nThese can be found by going into Elastic Cloud and clicking on **Services** inside APM and then **Add Data** , which should be visible in the top right corner.\\n\\n![add data](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-add-data.png)\\n\\nFrom there you will see the following, which gives you the config information you need.\\n\\n![apm agents](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-apm-agents.png)\\n\\nYou will need to replace the following properties with those you find in Confluent Cloud.\\n\\n```bash\\nelastic.kafka.producer.sasl-jaas-config=\\n```\\n\\nThis configuration comes from the Clients page in Confluent Cloud.\\n\\n![confluent new client](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-confluent-new-client.png)\\n\\n### Adding the config for each service in Kubernetes\\n\\nOnce you have a fully configured application properties, you need to add it to your [Kubernetes environment](https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring) as below.\\n\\nFrom the aws namespace.\\n\\n```bash\\nkubectl create secret generic my-app-config --from-file=application.properties\\n```\\n\\nFrom the gcp-1 namespace.\\n\\n```bash\\nkubectl create secret generic my-app-config --from-file=application.properties\\n```\\n\\nFrom the gcp-2 namespace.\\n\\n```bash\\nkubectl create secret generic bigdata-creds --from-file=elastic-product-marketing-e145e13fbc7c.json\\n\\nkubectl create secret generic my-app-config-gcp-bigdata --from-file=application.properties\\n```\\n\\n### Create a Kubernetes deployment\\n\\nCreate a Kubernetes deployment YAML file and add your Docker image to it. You can use the deployment.yaml file provided in the project as a template. Make sure to update the image name in the file to match the name of the Docker image you just created.\\n\\n```bash\\nkubectl apply -f deployment.yaml\\n```\\n\\n### Create a Kubernetes service\\n\\nCreate a Kubernetes service YAML file and add your deployment to it. You can use the service.yaml file provided in the project as a template.\\n\\n```yaml\\nkubectl apply -f service.yaml\\n```\\n\\n### Access your application\\n\\nYour application is now running in a Kubernetes cluster. To access it, you can use the service\'s cluster IP and port. You can get the service\'s IP and port using the following command.\\n\\n```bash\\nkubectl get services\\n```\\n\\nNow once you know where the service is, you need to execute it!\\n\\nYou can regularly poke the service endpoint using the following command.\\n\\n```bash\\ncurl -X POST -H \\"Content-Type: application/json\\" -d \'{\\"name\\": \\"linuxize\\", \\"email\\": \\"linuxize@example.com\\"}\' https://localhost:8080/api/my-objects/publish\\n```\\n\\nWith this up and running, you should see the following service map build out in the Elastic APM product.\\n\\n![aws elastic apm spring boot](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-aws-elastic-apm-spring-boot.png)\\n\\nAnd traces will contain a waterfall graph showing all the spans that have executed across this distributed application, allowing you to pinpoint where any issues are within each transaction.\\n\\n![observability services](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-services.png)\\n\\n## JMX for Kafka Producer/Consumer metrics\\n\\nIn the previous part of this blog, we briefly touched on the JMX metric configuration you can see below.\\n\\n```bash\\n\\"capture_jmx_metrics\\",\\"object_name[kafka.producer:type=producer-metrics,client-id=*] attribute[batch-size-avg:metric_name=kafka.producer.batch-size-avg]\\"\\n```\\n\\nWe can use this “capture_jmx_metrics” configuration to configure JMX for any Kafka Producer/Consumer metrics we want to monitor.\\n\\nCheck out the documentation [here](https://www.elastic.co/guide/en/apm/agent/java/current/config-jmx.html) to understand how to configure this and [here](https://docs.confluent.io/platform/current/kafka/monitoring.html) to see the available JMX metrics you can monitor. In the [example code in GitHub](https://github.com/davidgeorgehope/multi-cloud/blob/main/gcp-bigdata-consumer-multi-cloud/src/main/java/com/elastic/multicloud/ElasticApmConfig.java), we actually pull all the available metrics in, so you can check in there how to configure this.\\n\\nOne thing that’s worth pointing out here is that it’s important to use the “metric_name” property shown above or it gets quite difficult to find the metrics in Elastic Discover without being specific here.\\n\\n## Monitoring Confluent Cloud with Elastic Observability\\n\\nSo we now have some good monitoring set up for Kafka Producers and Consumers and we can trace transactions between services down to the lines of code that are executing. The core part of our Kafka infrastructure is hosted in Confluent Cloud. How, then, do we get data from there into our [full stack observability solution](https://www.elastic.co/observability)?\\n\\nLuckily, Confluent has done a fantastic job of making this easy. It provides important Confluent Cloud metrics via an open Prometheus-based metrics URL. So let\'s get down to business and configure this to bring data into our [observability tool](https://www.elastic.co/observability).\\n\\nThe first step is to configure Confluent Cloud with the MetricsViewer. The MetricsViewer role provides service account access to the Metrics API for all clusters in an organization. This role also enables service accounts to import metrics into third-party metrics platforms.\\n\\nTo assign the MetricsViewer role to a new service account:\\n\\n1. In the top-right administration menu (☰) in the upper-right corner of the Confluent Cloud user interface, click **ADMINISTRATION \\\\> Cloud API keys**.\\n2. Click **Add key**.\\n3. Click the **Granular access tile** to set the scope for the API key. Click **Next**.\\n4. Click **Create a new one** and specify the service account name. Optionally, add a description. Click **Next**.\\n5. The API key and secret are generated for the service account. You will need this API key and secret to connect to the cluster, so be sure to safely store this information. Click **Save**. The new service account with the API key and associated ACLs is created. When you return to the API access tab, you can view the newly-created API key to confirm.\\n6. Return to Accounts & access in the administration menu, and in the Accounts tab, click **Service accounts** to view your service accounts.\\n7. Select the service account that you want to assign the MetricsViewer role to.\\n8. In the service account’s details page, click **Access**.\\n9. In the tree view, open the resource where you want the service account to have the MetricsViewer role.\\n10. Click **Add role assignment** and select the MetricsViewer tile. Click **Save**.\\n\\nNext we can head to [Elastic Observability](https://www.elastic.co/observability) and configure the Prometheus integration to pull in the metrics data.\\n\\nGo to the integrations page in Kibana.\\n\\n![observability integrations](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-integrations.png)\\n\\nFind the Prometheus integration. We are using the Prometheus integration because the Confluent Cloud metrics server can provide data in prometheus format. Trust us, this works really well — good work Confluent!\\n\\n![integrations prometheus](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-integrations-prometheus.png)\\n\\nAdd Prometheus in the next page.\\n\\n![add prometheus](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-add-prometheus.png)\\n\\nConfigure the Prometheus plugin in the following way: In the hosts box, add the following URL, replacing the resource kafka id with the cluster id you want to monitor.\\n\\n```bash\\nhttps://api.telemetry.confluent.cloud:443/v2/metrics/cloud/export?resource.kafka.id=lkc-3rw3gw\\n```\\n\\n![collect prometheus metrics](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-collect-prometheus-metrics.png)\\n\\nAdd the username and password under the advanced options you got from the API keys step you executed against Confluent Cloud above.\\n\\n![http config options](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-http-config-options.png)\\n\\nOnce the Integration is created, [the policy needs to be applied](https://www.elastic.co/guide/en/fleet/current/agent-policy.html#apply-a-policy) to an instance of a running Elastic Agent.\\n\\nThat’s it! It’s that easy to get all the data you need for a full stack observability monitoring solution.\\n\\nFinally, let’s pull all this together in a dashboard.\\n\\n## Pulling it all together\\n\\nUsing Kibana to generate dashboards is super easy. If you configured everything the way we recommended above, you should find the metrics (producer/consumer/brokers) you need to create your own dashboard as per the following screenshot.\\n\\n![dashboard metrics](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-dashboard-metrics.png)\\n\\nLuckily, I made a dashboard for you and stored it in [GitHub](https://github.com/davidgeorgehope/multi-cloud/blob/main/export.ndjson). Take a look below and use this to import it into your own environments.\\n\\n![producer metrics](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-producer_metrics.png)\\n\\n## Adding the icing on the cake: machine learning anomaly detection\\n\\nNow that we have all the critical bits in place, we are going to add the icing on the cake: machine learning (ML)!\\n\\nWithin Kibana, let\'s head over to the Machine Learning tab in “Analytics.”\\n\\n![kibana analytics](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-kibana-analytics.png)\\n\\nGo to the jobs page, where we’ll get started creating our first anomaly detection job.\\n\\n![create your first anomaly detection job](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-create-your-first-anomaly-detection-job.png)\\n\\nThe metrics data view contains what we need to create this new anomaly detection job.\\n\\n![observability metrics](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-metrics.png)\\n\\nUse the wizard and select a “Single Metric.”\\n\\n![use a wizard](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-use-a-wizard.png)\\n\\nUse the full data.\\n\\n![use full data](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-use-full-data.png)\\n\\nIn this example, we are going to look for anomalies in the connection count. We really do not want a major deviation here, as this could indicate something very bad occurring if we suddenly have too many or too few things connecting to our Kafka cluster.\\n\\nOnce you have selected the connection count metric, you can proceed through the wizard and eventually your ML job will be created and you should be able to view the data as per the example below.\\n\\n![single metric viewer](/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-single-metric-viewer.png)\\n\\nCongratulations, you have now created a machine learning job to alert you if there are any problems with your Kafka cluster, adding [a full AIOps solution](https://www.elastic.co/observability/aiops) to your Kafka and Confluent observability!\\n\\n## Summary\\n\\nWe looked at monitoring Kafka-based solutions implemented on Confluent Cloud using Elastic Observability.\\n\\nWe covered the architecture of a multi-cloud solution involving AWS EKS, Confluent Cloud, and GCP GKE. We looked at how to instrument Kafka applications with Elastic APM, use JMX for Kafka Producer/Consumer metrics, integrate Prometheus, and set up machine learning anomaly detection.\\n\\nWe went through a detailed walk-through with code snippets, configuration steps, and deployment instructions included to help you get started.\\n\\nInterested in learning more about Elastic Observability? Check out the following resources:\\n\\n- [An Introduction to Elastic Observability](https://www.elastic.co/virtual-events/intro-to-elastic-observability)\\n- [Observability Fundamentals Training](https://www.elastic.co/training/observability-fundamentals)\\n- [Watch an Elastic Observability demo](https://www.elastic.co/observability/demo)\\n- [Observability Predictions and Trends for 2023](https://www.elastic.co/blog/observability-predictions-trends-2023)\\n\\nAnd sign up for our [Elastic Observability Trends Webinar](https://www.elastic.co/virtual-events/emerging-trends-in-observability) featuring AWS and Forrester, not to be missed!\\n","code":"var Component=(()=>{var u=Object.create;var o=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var f=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),w=(i,e)=>{for(var n in e)o(i,n,{get:e[n],enumerable:!0})},l=(i,e,n,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!b.call(i,a)&&a!==n&&o(i,a,{get:()=>e[a],enumerable:!(r=p(e,a))||r.enumerable});return i};var y=(i,e,n)=>(n=i!=null?u(m(i)):{},l(e||!i||!i.__esModule?o(n,\\"default\\",{value:i,enumerable:!0}):n,i)),v=i=>l(o({},\\"__esModule\\",{value:!0}),i);var s=f((A,c)=>{c.exports=_jsx_runtime});var C={};w(C,{default:()=>d,frontmatter:()=>k});var t=y(s()),k={title:\\"How to monitor Kafka and Confluent Cloud with Elastic Observability\\",slug:\\"monitor-kafka-confluent-cloud-elastic-observability\\",date:\\"2023-04-03\\",description:\\"This blog post will take you through best practices to observe Kafka-based solutions implemented on Confluent Cloud with Elastic Observability.\\",author:[{slug:\\"david-hope\\"}],image:\\"patterns-white-background-no-logo-observability_(1).png\\",tags:[{slug:\\"kubernetes\\"},{slug:\\"confluent\\"},{slug:\\"apm\\"}]};function h(i){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"The blog will take you through best practices to observe Kafka-based solutions implemented on Confluent Cloud with Elastic Observability. (To monitor Kafka brokers that are not in Confluent Cloud, I recommend checking out \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/how-to-monitor-containerized-kafka-with-elastic-observability\\",rel:\\"nofollow\\",children:\\"this blog\\"}),\\".) We will instrument Kafka applications with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"Elastic APM\\"}),\\", use the Confluent Cloud metrics endpoint to get data about brokers, and pull it all together with a unified Kafka and Confluent Cloud monitoring dashboard in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"using-full-stack-elastic-observability-to-understand-kafka-and-confluent-performance\\",children:\\"Using full-stack Elastic Observability to understand Kafka and Confluent performance\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the \\",(0,t.jsx)(e.a,{href:\\"https://dice.viewer.foleon.com/ebooks/dice-tech-salary-report-explore/\\",rel:\\"nofollow\\",children:\\"2023 Dice Tech Salary Report\\"}),\\", Elasticsearch and Kakfa are ranked #3 and #5 out of the top 12 \\",(0,t.jsx)(e.a,{href:\\"https://dice.viewer.foleon.com/ebooks/dice-tech-salary-report-explore/salary-trends#Skills\\",rel:\\"nofollow\\",children:\\"most in demand skills\\"}),\\" at the moment, so it\\\\u2019s no surprise that we are seeing a large number of customers who are implementing data in motion with Kafka.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations?search=kafka\\",rel:\\"nofollow\\",children:\\"Kafka\\"}),\\" comes with some additional complexities that go beyond traditional architectures and which make observability an even more important topic. Understanding where the bottlenecks are in messaging and stream-based architectures can be tough. This is why you need a comprehensive observability solution with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aiops-use-cases-observability-operations\\",rel:\\"nofollow\\",children:\\"machine learning\\"}),\\" to help you.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this blog, we will explore how to get Kafka applications instrumented with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"Elastic APM\\"}),\\", how to collect performance data with JMX, and how you can use the Elasticsearch Platform to pull in data from Confluent Cloud \\\\u2014 which is by far the easiest and most cost-effective way to implement Kafka architectures.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For this blog post, we will be following the code at this \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/multi-cloud\\",rel:\\"nofollow\\",children:\\"git repository\\"}),\\". There are three services here that are designed to run on two clouds and push data from one cloud to the other and finally into Google BigQuery. We want to monitor all of this using Elastic Observability to give you a complete picture of Confluent and Kafka Services performance as a teaser \\\\u2014 this is the goal below:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-producer_metrics.png\\",alt:\\"kafka producer metrics\\",width:\\"1999\\",height:\\"1026\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"a-look-at-the-architecture\\",children:\\"A look at the architecture\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As mentioned, we have three \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/cloud-monitoring\\",rel:\\"nofollow\\",children:\\"multi-cloud services\\"}),\\" implemented in our example application.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The first service is a Spring WebFlux service that runs inside AWS EKS. This service will take a message from a REST Endpoint and simply put it straight on to a Kafka topic.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The second service, which is also a Spring WebFlux service hosted inside Google Cloud Platform (GCP) with its \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/google-cloud-monitoring\\",rel:\\"nofollow\\",children:\\"Google Cloud monitoring\\"}),\\", will then pick this up and forward it to another service that will put the message into BigQuery.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"These services are all instrumented using Elastic APM. For this blog, we have decided to use Spring config to inject and configure the APM agent. You could of course use the \\\\u201C-javaagent\\\\u201D argument to inject the agent instead if preferred.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-obsevability-aws-kafka-google-cloud.png\\",alt:\\"aws kafka google cloud\\",width:\\"1775\\",height:\\"851\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"getting-started-with-elastic-observability-and-confluent-cloud\\",children:\\"Getting started with Elastic Observability and Confluent Cloud\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Before we dive into the application and its configuration, you will want to get an Elastic Cloud and Confluent Cloud account. You can sign up here for \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/\\",rel:\\"nofollow\\",children:\\"Elastic\\"}),\\" and here for \\",(0,t.jsx)(e.a,{href:\\"https://www.confluent.io/confluent-cloud/\\",rel:\\"nofollow\\",children:\\"Confluent Cloud\\"}),\\". There are some initial configuration steps we need to do inside Confluent Cloud, as you will need to create three topics: gcpTopic, myTopic, and topic_2.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"When you sign up for Confluent Cloud, you will be given an option of what type of cluster to create. For this walk-through, a Basic cluster is fine (as shown) \\\\u2014 if you are careful about usage, it will not cost you a penny.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-confluent-create-cluster.png\\",alt:\\"confluent create cluster\\",width:\\"584\\",height:\\"824\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you have a cluster, go ahead and create the three topics.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-confluent-topics.png\\",alt:\\"confluent topics\\",width:\\"1675\\",height:\\"726\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For this walk-through, you will only need to create single partition topics as shown below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-new-topic.png\\",alt:\\"new topic\\",width:\\"546\\",height:\\"237\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now we are ready to set up the Elastic Cloud cluster.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-create-a-deployment.png\\",alt:\\"create a deployment\\",width:\\"766\\",height:\\"567\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"One thing to note here is that when setting up an Elastic cluster, the defaults are mostly OK. With one minor tweak to add in the Machine Learning under \\\\u201CAdvanced Settings,\\\\u201D add capacity for machine learning here.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-machine-learning-instances.png\\",alt:\\"machine learning instances\\",width:\\"717\\",height:\\"141\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"getting-apm-up-and-running\\",children:\\"Getting APM up and running\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The first thing we want to do here is get our Spring Boot Webflux-based services up and running. For this blog, I have decided to implement this using the Spring Configuration, as you can see below. For brevity, I have not listed all the JMX configuration information, but you can see those details in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/multi-cloud/blob/main/aws-multi-cloud/src/main/java/com/elastic/multicloud/ElasticApmConfig.java\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`package com.elastic.multicloud;\\nimport co.elastic.apm.attach.ElasticApmAttacher;\\nimport jakarta.annotation.PostConstruct;\\nimport lombok.Setter;\\nimport org.slf4j.Logger;\\nimport org.slf4j.LoggerFactory;\\nimport org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;\\nimport org.springframework.boot.context.properties.ConfigurationProperties;\\nimport org.springframework.context.annotation.Configuration;\\n\\nimport java.util.HashMap;\\nimport java.util.Map;\\n\\n@Setter\\n@Configuration\\n@ConfigurationProperties(prefix = \\"elastic.apm\\")\\n@ConditionalOnProperty(value = \\"elastic.apm.enabled\\", havingValue = \\"true\\")\\npublic class ElasticApmConfig {\\n\\n private static final String SERVER_URL_KEY = \\"server_url\\";\\n private String serverUrl;\\n\\n private static final String SERVICE_NAME_KEY = \\"service_name\\";\\n private String serviceName;\\n\\n private static final String SECRET_TOKEN_KEY = \\"secret_token\\";\\n private String secretToken;\\n\\n private static final String ENVIRONMENT_KEY = \\"environment\\";\\n private String environment;\\n\\n private static final String APPLICATION_PACKAGES_KEY = \\"application_packages\\";\\n private String applicationPackages;\\n\\n private static final String LOG_LEVEL_KEY = \\"log_level\\";\\n private String logLevel;\\n private static final Logger LOGGER = LoggerFactory.getLogger(ElasticApmConfig.class);\\n\\n @PostConstruct\\n public void init() {\\n LOGGER.info(environment);\\n\\n Map apmProps = new HashMap<>(6);\\n apmProps.put(SERVER_URL_KEY, serverUrl);\\n apmProps.put(SERVICE_NAME_KEY, serviceName);\\n apmProps.put(SECRET_TOKEN_KEY, secretToken);\\n apmProps.put(ENVIRONMENT_KEY, environment);\\n apmProps.put(APPLICATION_PACKAGES_KEY, applicationPackages);\\n apmProps.put(LOG_LEVEL_KEY, logLevel);\\n apmProps.put(\\"enable_experimental_instrumentations\\",\\"true\\");\\n apmProps.put(\\"capture_jmx_metrics\\",\\"object_name[kafka.producer:type=producer-metrics,client-id=*] attribute[batch-size-avg:metric_name=kafka.producer.batch-size-avg]\\");\\n\\n\\n ElasticApmAttacher.attach(apmProps);\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now obviously this requires some dependencies, which you can see here in the Maven pom.xml.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-xml\\",children:`\\n\\t\\t\\tco.elastic.apm\\n\\t\\t\\tapm-agent-attach\\n\\t\\t\\t1.35.1-SNAPSHOT\\n\\t\\t\\n\\t\\t\\n\\t\\t\\tco.elastic.apm\\n\\t\\t\\tapm-agent-api\\n\\t\\t\\t1.35.1-SNAPSHOT\\n\\t\\t\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Strictly speaking, the agent-api is not required, but it could be useful if you have a desire to add your own monitoring code (as per the example below). The agent will happily auto-instrument without needing to do that though.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`Transaction transaction = ElasticApm.currentTransaction();\\n Span span = ElasticApm.currentSpan()\\n .startSpan(\\"external\\", \\"kafka\\", null)\\n .setName(\\"DAVID\\").setServiceTarget(\\"kafka\\",\\"gcp-elastic-apm-spring-boot-integration\\");\\n try (final Scope scope = transaction.activate()) {\\n span.injectTraceHeaders((name, value) -> producerRecord.headers().add(name,value.getBytes()));\\n return Mono.fromRunnable(() -> {\\n kafkaTemplate.send(producerRecord);\\n });\\n } catch (Exception e) {\\n span.captureException(e);\\n throw e;\\n } finally {\\n span.end();\\n }\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now we have enough code to get our agent bootstrapped.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To get the code from the GitHub repository up and running, you will need the following installed on your system and to ensure that you have the credentials for your GCP and AWS cloud.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`\\nJava\\nMaven\\nDocker\\nKubernetes CLI (kubectl)\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"clone-the-project\\",children:\\"Clone the project\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Clone the multi-cloud Spring project to your local machine.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/davidgeorgehope/multi-cloud\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"build-the-project\\",children:\\"Build the project\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"From each service in the project (aws-multi-cloud, gcp-multi-cloud, gcp-bigdata-consumer-multi-cloud), run the following commands to build the project.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`mvn clean install\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now you can run the Java project locally.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`java -jar gcp-bigdata-consumer-multi-cloud-0.0.1-SNAPSHOT.jar --spring.config.location=/Users/davidhope/applicaiton-gcp.properties\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"That will just get the Java application running locally, but you can also deploy this to Kubernetes using EKS and GKE as shown below.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"create-a-docker-image\\",children:\\"Create a Docker image\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Create a Docker image from the built project using the dockerBuild.sh provided in the project. You may want to customize this shell script to upload the built docker image to your own docker repository.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`./dockerBuild.sh\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"create-a-namespace-for-each-service\\",children:\\"Create a namespace for each service\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl create namespace aws\\n`})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl create namespace gcp-1\\n`})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl create namespace gcp-2\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you have the namespaces created, you can switch context using the following command:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl config set-context --current --namespace=my-namespace\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"configuration-for-each-service\\",children:\\"Configuration for each service\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Each service needs an application.properties file. I have put an example \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/multi-cloud/blob/main/gcp-bigdata-consumer-multi-cloud/application.properties\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will need to replace the following properties with those you find in Elastic.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`elastic.apm.server-url=\\nelastic.apm.secret-token=\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"These can be found by going into Elastic Cloud and clicking on \\",(0,t.jsx)(e.strong,{children:\\"Services\\"}),\\" inside APM and then \\",(0,t.jsx)(e.strong,{children:\\"Add Data\\"}),\\" , which should be visible in the top right corner.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-add-data.png\\",alt:\\"add data\\",width:\\"124\\",height:\\"38\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"From there you will see the following, which gives you the config information you need.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1188\\",height:\\"788\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will need to replace the following properties with those you find in Confluent Cloud.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`elastic.kafka.producer.sasl-jaas-config=\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This configuration comes from the Clients page in Confluent Cloud.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-confluent-new-client.png\\",alt:\\"confluent new client\\",width:\\"1389\\",height:\\"1038\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"adding-the-config-for-each-service-in-kubernetes\\",children:\\"Adding the config for each service in Kubernetes\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once you have a fully configured application properties, you need to add it to your \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring\\",rel:\\"nofollow\\",children:\\"Kubernetes environment\\"}),\\" as below.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"From the aws namespace.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl create secret generic my-app-config --from-file=application.properties\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"From the gcp-1 namespace.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl create secret generic my-app-config --from-file=application.properties\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"From the gcp-2 namespace.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl create secret generic bigdata-creds --from-file=elastic-product-marketing-e145e13fbc7c.json\\n\\nkubectl create secret generic my-app-config-gcp-bigdata --from-file=application.properties\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"create-a-kubernetes-deployment\\",children:\\"Create a Kubernetes deployment\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Create a Kubernetes deployment YAML file and add your Docker image to it. You can use the deployment.yaml file provided in the project as a template. Make sure to update the image name in the file to match the name of the Docker image you just created.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl apply -f deployment.yaml\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"create-a-kubernetes-service\\",children:\\"Create a Kubernetes service\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Create a Kubernetes service YAML file and add your deployment to it. You can use the service.yaml file provided in the project as a template.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`kubectl apply -f service.yaml\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"access-your-application\\",children:\\"Access your application\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Your application is now running in a Kubernetes cluster. To access it, you can use the service\'s cluster IP and port. You can get the service\'s IP and port using the following command.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl get services\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now once you know where the service is, you need to execute it!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can regularly poke the service endpoint using the following command.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`curl -X POST -H \\"Content-Type: application/json\\" -d \'{\\"name\\": \\"linuxize\\", \\"email\\": \\"linuxize@example.com\\"}\' https://localhost:8080/api/my-objects/publish\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With this up and running, you should see the following service map build out in the Elastic APM product.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-aws-elastic-apm-spring-boot.png\\",alt:\\"aws elastic apm spring boot\\",width:\\"1999\\",height:\\"480\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And traces will contain a waterfall graph showing all the spans that have executed across this distributed application, allowing you to pinpoint where any issues are within each transaction.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-services.png\\",alt:\\"observability services\\",width:\\"919\\",height:\\"691\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"jmx-for-kafka-producerconsumer-metrics\\",children:\\"JMX for Kafka Producer/Consumer metrics\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the previous part of this blog, we briefly touched on the JMX metric configuration you can see below.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`\\"capture_jmx_metrics\\",\\"object_name[kafka.producer:type=producer-metrics,client-id=*] attribute[batch-size-avg:metric_name=kafka.producer.batch-size-avg]\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We can use this \\\\u201Ccapture_jmx_metrics\\\\u201D configuration to configure JMX for any Kafka Producer/Consumer metrics we want to monitor.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Check out the documentation \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/java/current/config-jmx.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\" to understand how to configure this and \\",(0,t.jsx)(e.a,{href:\\"https://docs.confluent.io/platform/current/kafka/monitoring.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\" to see the available JMX metrics you can monitor. In the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/multi-cloud/blob/main/gcp-bigdata-consumer-multi-cloud/src/main/java/com/elastic/multicloud/ElasticApmConfig.java\\",rel:\\"nofollow\\",children:\\"example code in GitHub\\"}),\\", we actually pull all the available metrics in, so you can check in there how to configure this.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"One thing that\\\\u2019s worth pointing out here is that it\\\\u2019s important to use the \\\\u201Cmetric_name\\\\u201D property shown above or it gets quite difficult to find the metrics in Elastic Discover without being specific here.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"monitoring-confluent-cloud-with-elastic-observability\\",children:\\"Monitoring Confluent Cloud with Elastic Observability\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"So we now have some good monitoring set up for Kafka Producers and Consumers and we can trace transactions between services down to the lines of code that are executing. The core part of our Kafka infrastructure is hosted in Confluent Cloud. How, then, do we get data from there into our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"full stack observability solution\\"}),\\"?\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Luckily, Confluent has done a fantastic job of making this easy. It provides important Confluent Cloud metrics via an open Prometheus-based metrics URL. So let\'s get down to business and configure this to bring data into our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"observability tool\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The first step is to configure Confluent Cloud with the MetricsViewer. The MetricsViewer role provides service account access to the Metrics API for all clusters in an organization. This role also enables service accounts to import metrics into third-party metrics platforms.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To assign the MetricsViewer role to a new service account:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"In the top-right administration menu (\\\\u2630) in the upper-right corner of the Confluent Cloud user interface, click \\",(0,t.jsx)(e.strong,{children:\\"ADMINISTRATION > Cloud API keys\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Add key\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Click the \\",(0,t.jsx)(e.strong,{children:\\"Granular access tile\\"}),\\" to set the scope for the API key. Click \\",(0,t.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Create a new one\\"}),\\" and specify the service account name. Optionally, add a description. Click \\",(0,t.jsx)(e.strong,{children:\\"Next\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The API key and secret are generated for the service account. You will need this API key and secret to connect to the cluster, so be sure to safely store this information. Click \\",(0,t.jsx)(e.strong,{children:\\"Save\\"}),\\". The new service account with the API key and associated ACLs is created. When you return to the API access tab, you can view the newly-created API key to confirm.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Return to Accounts & access in the administration menu, and in the Accounts tab, click \\",(0,t.jsx)(e.strong,{children:\\"Service accounts\\"}),\\" to view your service accounts.\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Select the service account that you want to assign the MetricsViewer role to.\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"In the service account\\\\u2019s details page, click \\",(0,t.jsx)(e.strong,{children:\\"Access\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"In the tree view, open the resource where you want the service account to have the MetricsViewer role.\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Click \\",(0,t.jsx)(e.strong,{children:\\"Add role assignment\\"}),\\" and select the MetricsViewer tile. Click \\",(0,t.jsx)(e.strong,{children:\\"Save\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Next we can head to \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\"}),\\" and configure the Prometheus integration to pull in the metrics data.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Go to the integrations page in Kibana.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-integrations.png\\",alt:\\"observability integrations\\",width:\\"652\\",height:\\"168\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Find the Prometheus integration. We are using the Prometheus integration because the Confluent Cloud metrics server can provide data in prometheus format. Trust us, this works really well \\\\u2014 good work Confluent!\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-integrations-prometheus.png\\",alt:\\"integrations prometheus\\",width:\\"593\\",height:\\"366\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Add Prometheus in the next page.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-add-prometheus.png\\",alt:\\"add prometheus\\",width:\\"188\\",height:\\"94\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Configure the Prometheus plugin in the following way: In the hosts box, add the following URL, replacing the resource kafka id with the cluster id you want to monitor.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`https://api.telemetry.confluent.cloud:443/v2/metrics/cloud/export?resource.kafka.id=lkc-3rw3gw\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-collect-prometheus-metrics.png\\",alt:\\"collect prometheus metrics\\",width:\\"890\\",height:\\"188\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Add the username and password under the advanced options you got from the API keys step you executed against Confluent Cloud above.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-http-config-options.png\\",alt:\\"http config options\\",width:\\"412\\",height:\\"185\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once the Integration is created, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/agent-policy.html#apply-a-policy\\",rel:\\"nofollow\\",children:\\"the policy needs to be applied\\"}),\\" to an instance of a running Elastic Agent.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"That\\\\u2019s it! It\\\\u2019s that easy to get all the data you need for a full stack observability monitoring solution.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Finally, let\\\\u2019s pull all this together in a dashboard.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"pulling-it-all-together\\",children:\\"Pulling it all together\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Using Kibana to generate dashboards is super easy. If you configured everything the way we recommended above, you should find the metrics (producer/consumer/brokers) you need to create your own dashboard as per the following screenshot.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-dashboard-metrics.png\\",alt:\\"dashboard metrics\\",width:\\"1173\\",height:\\"944\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Luckily, I made a dashboard for you and stored it in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/multi-cloud/blob/main/export.ndjson\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". Take a look below and use this to import it into your own environments.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-producer_metrics.png\\",alt:\\"producer metrics\\",width:\\"1999\\",height:\\"1026\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"adding-the-icing-on-the-cake-machine-learning-anomaly-detection\\",children:\\"Adding the icing on the cake: machine learning anomaly detection\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that we have all the critical bits in place, we are going to add the icing on the cake: machine learning (ML)!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Within Kibana, let\'s head over to the Machine Learning tab in \\\\u201CAnalytics.\\\\u201D\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-kibana-analytics.png\\",alt:\\"kibana analytics\\",width:\\"247\\",height:\\"211\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Go to the jobs page, where we\\\\u2019ll get started creating our first anomaly detection job.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-create-your-first-anomaly-detection-job.png\\",alt:\\"create your first anomaly detection job\\",width:\\"956\\",height:\\"414\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The metrics data view contains what we need to create this new anomaly detection job.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-metrics.png\\",alt:\\"observability metrics\\",width:\\"1239\\",height:\\"143\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Use the wizard and select a \\\\u201CSingle Metric.\\\\u201D\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-use-a-wizard.png\\",alt:\\"use a wizard\\",width:\\"804\\",height:\\"151\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Use the full data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-use-full-data.png\\",alt:\\"use full data\\",width:\\"201\\",height:\\"90\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this example, we are going to look for anomalies in the connection count. We really do not want a major deviation here, as this could indicate something very bad occurring if we suddenly have too many or too few things connecting to our Kafka cluster.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you have selected the connection count metric, you can proceed through the wizard and eventually your ML job will be created and you should be able to view the data as per the example below.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/blog-elastic-observability-single-metric-viewer.png\\",alt:\\"single metric viewer\\",width:\\"863\\",height:\\"914\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Congratulations, you have now created a machine learning job to alert you if there are any problems with your Kafka cluster, adding \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/aiops\\",rel:\\"nofollow\\",children:\\"a full AIOps solution\\"}),\\" to your Kafka and Confluent observability!\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We looked at monitoring Kafka-based solutions implemented on Confluent Cloud using Elastic Observability.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We covered the architecture of a multi-cloud solution involving AWS EKS, Confluent Cloud, and GCP GKE. We looked at how to instrument Kafka applications with Elastic APM, use JMX for Kafka Producer/Consumer metrics, integrate Prometheus, and set up machine learning anomaly detection.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We went through a detailed walk-through with code snippets, configuration steps, and deployment instructions included to help you get started.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Interested in learning more about Elastic Observability? Check out the following resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/intro-to-elastic-observability\\",rel:\\"nofollow\\",children:\\"An Introduction to Elastic Observability\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/training/observability-fundamentals\\",rel:\\"nofollow\\",children:\\"Observability Fundamentals Training\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/demo\\",rel:\\"nofollow\\",children:\\"Watch an Elastic Observability demo\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-predictions-trends-2023\\",rel:\\"nofollow\\",children:\\"Observability Predictions and Trends for 2023\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"And sign up for our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/emerging-trends-in-observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability Trends Webinar\\"}),\\" featuring AWS and Forrester, not to be missed!\\"]})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return v(C);})();\\n;return Component;"},"_id":"articles/how-to-monitor-kafka-confluent-cloud-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/how-to-monitor-kafka-confluent-cloud-elastic-observability.mdx","sourceFileName":"how-to-monitor-kafka-confluent-cloud-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/how-to-monitor-kafka-confluent-cloud-elastic-observability"},"type":"Article","imageUrl":"/assets/images/monitor-kafka-confluent-cloud-elastic-observability/patterns-white-background-no-logo-observability_(1).png","readingTime":"16 min read","url":"/monitor-kafka-confluent-cloud-elastic-observability","headings":[{"level":2,"title":"Using full-stack Elastic Observability to understand Kafka and Confluent performance","href":"#using-full-stack-elastic-observability-to-understand-kafka-and-confluent-performance"},{"level":2,"title":"A look at the architecture","href":"#a-look-at-the-architecture"},{"level":2,"title":"Getting started with Elastic Observability and Confluent Cloud","href":"#getting-started-with-elastic-observability-and-confluent-cloud"},{"level":2,"title":"Getting APM up and running","href":"#getting-apm-up-and-running"},{"level":3,"title":"Clone the project","href":"#clone-the-project"},{"level":3,"title":"Build the project","href":"#build-the-project"},{"level":3,"title":"Create a Docker image","href":"#create-a-docker-image"},{"level":3,"title":"Create a namespace for each service","href":"#create-a-namespace-for-each-service"},{"level":3,"title":"Configuration for each service","href":"#configuration-for-each-service"},{"level":3,"title":"Adding the config for each service in Kubernetes","href":"#adding-the-config-for-each-service-in-kubernetes"},{"level":3,"title":"Create a Kubernetes deployment","href":"#create-a-kubernetes-deployment"},{"level":3,"title":"Create a Kubernetes service","href":"#create-a-kubernetes-service"},{"level":3,"title":"Access your application","href":"#access-your-application"},{"level":2,"title":"JMX for Kafka Producer/Consumer metrics","href":"#jmx-for-kafka-producerconsumer-metrics"},{"level":2,"title":"Monitoring Confluent Cloud with Elastic Observability","href":"#monitoring-confluent-cloud-with-elastic-observability"},{"level":2,"title":"Pulling it all together","href":"#pulling-it-all-together"},{"level":2,"title":"Adding the icing on the cake: machine learning anomaly detection","href":"#adding-the-icing-on-the-cake-machine-learning-anomaly-detection"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"How to remove PII from your Elastic data in 3 easy steps","slug":"remove-pii-data","date":"2023-06-20","description":"Personally Identifiable Information compliance is an ever increasing challenge for any organization. With Elastic\'s intuitive ML interface and parsing capabilities, sensitive data may be easily redacted from unstructured data with ease.","image":"blog-post4-ai-search-B.jpg","author":[{"slug":"peter-titov","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"esre","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}},{"slug":"security","type":"Tag","_raw":{}}],"body":{"raw":"\\nPersonally identifiable information (PII) compliance is an ever-increasing challenge for any organization. Whether you’re in ecommerce, banking, healthcare, or other fields where data is sensitive, PII may inadvertently be captured and stored. Having structured logs enables quick identification, removal, and protection of sensitive data fields easily; but what about unstructured messages? Or perhaps call center transcriptions?\\n\\nElasticsearch, with its long experience in [machine learning](https://www.elastic.co/what-is/elasticsearch-machine-learning), provides various options to bring in custom models, such as large language models (LLMs), and provides its own models. These models will help implement PII redaction.\\n\\nIf you would like to learn more about natural language processing, machine learning, and Elastic, please be sure to check out these related articles:\\n\\n- [Introduction to modern natural language processing with PyTorch in Elasticsearch](https://www.elastic.co/blog/introduction-to-nlp-with-pytorch-models)\\n- [How to deploy natural language processing (NLP): Getting started](https://www.elastic.co/blog/how-to-deploy-natural-language-processing-nlp-getting-started)\\n- [Elastic Redact Processor Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/redact-processor.html)\\n- [Introducing Elastic Learned Sparse Encoder: Elastic’s AI model for semantic search](https://www.elastic.co/blog/may-2023-launch-sparse-encoder-ai-model)\\n- [Accessing machine learning models in Elastic](https://www.elastic.co/blog/may-2023-launch-machine-learning-models)\\n\\nIn this blog, we will show you how to set up PII redaction through the use of Elasticsearch’s ability to load a trained model within machine learning and the flexibility of Elastic’s ingest pipelines.\\n\\nSpecifically, we’ll walk through setting up a [named entity recognition (NER)](https://www.elastic.co/blog/how-to-deploy-nlp-named-entity-recognition-ner-example) model for person and location identification, as well as deploying the redact processor for custom data identification and removal. All of this will then be combined with an ingest pipeline where we can use Elastic machine learning and data transformations capabilities to remove sensitive information from your data.\\n\\n## Loading the trained model\\n\\nBefore we begin, we must load our NER model into our Elasticsearch cluster. This may be easily accomplished with Docker and the Elastic Eland client. From a command line, let’s install the Eland client via git:\\n\\n```bash\\ngit clone https://github.com/elastic/eland.git\\n```\\n\\nNavigate into the recently downloaded client:\\n\\n```bash\\ncd eland/\\n```\\n\\nNow let’s build the client:\\n\\n```bash\\ndocker build -t elastic/eland .\\n```\\n\\nFrom here, you’re ready to deploy the trained model to an Elastic machine learning node! Be sure to replace your username, password, es-cluster-hostname, and esport.\\n\\nIf you’re using the Elastic Cloud or have signed certificates, simply run this command:\\n\\n```bash\\ndocker run -it --rm --network host elastic/eland eland_import_hub_model --url https://:@:/ --hub-model-id dslim/bert-base-NER --task-type ner --start\\n```\\n\\nIf you’re using self-signed certificates, run this command:\\n\\n```bash\\ndocker run -it --rm --network host elastic/eland eland_import_hub_model --url https://:@:/ --insecure --hub-model-id dslim/bert-base-NER --task-type ner --start\\n```\\n\\nFrom here you’ll witness the Eland client in action downloading the trained model from [HuggingFace](https://huggingface.co/dslim/bert-base-NER) and automatically deploying it into your cluster!\\n\\n![huggingface code](/assets/images/remove-pii-data/blog-elastic-huggingface.png)\\n\\nSynchronize your newly loaded trained model by clicking on the blue hyperlink via your Machine Learning Overview UI “Synchronize your jobs and trained models.”\\n\\n![Machine Learning Overview UI](/assets/images/remove-pii-data/blog-elastic-Machine-Learning-Overview-UI.png)\\n\\nNow click the Synchronize button.\\n\\n![Synchronize button](/assets/images/remove-pii-data/blog-elastic-Synchronize-button.png)\\n\\nThat’s it! Congratulations, you just loaded your first trained model into Elastic!\\n\\n## Create the redact processor and ingest pipeline\\n\\nFrom DevTools, let’s configure the redact processor along with our inference processor to take advantage of Elastic’s trained model we just loaded. This will create an ingest pipeline named “redact” that we can then use to remove sensitive data from any field we wish. In this example, I’ll be focusing on the “message” field. Note: at the time of this writing, the redact processor is experimental and must be created via DevTools.\\n\\n```bash\\nPUT _ingest/pipeline/redact\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"redacted\\",\\n \\"value\\": \\"{{{message}}}\\"\\n }\\n },\\n {\\n \\"inference\\": {\\n \\"model_id\\": \\"dslim__bert-base-ner\\",\\n \\"field_map\\": {\\n \\"message\\": \\"text_field\\"\\n }\\n }\\n },\\n {\\n \\"script\\": {\\n \\"lang\\": \\"painless\\",\\n \\"source\\": \\"String msg = ctx[\'message\'];\\\\r\\\\n for (item in ctx[\'ml\'][\'inference\'][\'entities\']) {\\\\r\\\\n msg = msg.replace(item[\'entity\'], \'<\' + item[\'class_name\'] + \'>\')\\\\r\\\\n }\\\\r\\\\n ctx[\'redacted\']=msg\\"\\n }\\n },\\n {\\n \\"redact\\": {\\n \\"field\\": \\"redacted\\",\\n \\"patterns\\": [\\n \\"%{EMAILADDRESS:EMAIL}\\",\\n \\"%{IP:IP_ADDRESS}\\",\\n \\"%{CREDIT_CARD:CREDIT_CARD}\\",\\n \\"%{SSN:SSN}\\",\\n \\"%{PHONE:PHONE}\\"\\n ],\\n \\"pattern_definitions\\": {\\n \\"CREDIT_CARD\\": \\"\\\\d{4}[ -]\\\\d{4}[ -]\\\\d{4}[ -]\\\\d{4}\\",\\n \\"SSN\\": \\"\\\\d{3}-\\\\d{2}-\\\\d{4}\\",\\n \\"PHONE\\": \\"\\\\d{3}-\\\\d{3}-\\\\d{4}\\"\\n }\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"field\\": [\\n \\"ml\\"\\n ],\\n \\"ignore_missing\\": true,\\n \\"ignore_failure\\": true\\n }\\n }\\n ],\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"pii_script-redact\\"\\n }\\n }\\n ]\\n}\\n```\\n\\nOK, but what does each processor really do? Let’s walk through each processor in detail here:\\n\\n1. The SET processor creates the field “redacted,” which is copied over from the message field and used later on in the pipeline.\\n\\n2. The INFERENCE processor calls the NER model we loaded to be used on the message field for identifying names, locations, and organizations.\\n\\n3. The SCRIPT processor then replaced the detected entities within the redacted field from the message field.\\n\\n4. Our REDACT processor uses Grok patterns to identify any custom set of data we wish to remove from the redacted field (which was copied over from the message field).\\n\\n5. The REMOVE processor deletes the extraneous ml.\\\\* fields from being indexed; note we’ll add “message” to this processor once we validate data is being redacted properly.\\n\\n6. The ON_FAILURE / SET processor captures any errors just in case we have them.\\n\\n## Slice your PII\\n\\nNow that your ingest pipeline with all the necessary steps has been configured, let’s start testing how well we can remove sensitive data from documents. Navigate over to Stack Management, select Ingest Pipelines and search for “redact”, and then click on the result.\\n\\n![Ingest Pipelines](/assets/images/remove-pii-data/blog-elastic-Ingest-Pipelines.png)\\n\\nClick on the Manage button, and then click Edit.\\n\\n![Manage button](/assets/images/remove-pii-data/elastic-blog-Manage-button.png)\\n\\nHere we are going to test our pipeline by adding some documents. Below is a sample you can copy and paste to make sure everything is working correctly.\\n\\n![test pipeline](/assets/images/remove-pii-data/elastic-blog-test-pipeline.png)\\n\\n```yaml\\n{\\n \\"_source\\":\\n {\\n \\"message\\": \\"John Smith lives at 123 Main St. Highland Park, CO. His email address is jsmith123@email.com and his phone number is 412-189-9043. I found his social security number, it is 942-00-1243. Oh btw, his credit card is 1324-8374-0978-2819 and his gateway IP is 192.168.1.2\\",\\n },\\n}\\n```\\n\\nSimply press the Run the pipeline button, and you will then see the following output:\\n\\n![pii output code](/assets/images/remove-pii-data/elastic-blog-pii-output-2.png)\\n\\n## What’s next?\\n\\nAfter you’ve added this ingest pipeline to a data set you’re indexing and validated that it is meeting expectations, you can add the message field to be removed so that no PII data is indexed. Simply update your REMOVE processor to include the message field and simulate again to only see the redacted field.\\n\\n![](/assets/images/remove-pii-data/elastic-blog-manage-processor.png)\\n\\n![pii output code 2](/assets/images/remove-pii-data/elastic-blog-pii-output.png)\\n\\n## Conclusion\\n\\nWith this step-by-step approach, you are now ready and able to detect and redact any sensitive data throughout your indices.\\n\\nHere’s a quick recap of what we covered:\\n\\n- Loading a pre-trained named entity recognition model into an Elastic cluster\\n- Configuring the Redact processor, along with the inference processor, to use the trained model during data ingestion\\n- Testing sample data and modifying the ingest pipeline to safely remove personally identifiable information\\n\\nReady to get started? Sign up [for Elastic Cloud](https://cloud.elastic.co/registration) and try out the features and capabilities I’ve outlined above to get the most value and visibility out of your OpenTelemetry data.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),y=(i,e)=>{for(var t in e)r(i,t,{get:e[t],enumerable:!0})},s=(i,e,t,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!f.call(i,a)&&a!==t&&r(i,a,{get:()=>e[a],enumerable:!(o=g(e,a))||o.enumerable});return i};var b=(i,e,t)=>(t=i!=null?p(u(i)):{},s(e||!i||!i.__esModule?r(t,\\"default\\",{value:i,enumerable:!0}):t,i)),v=i=>s(r({},\\"__esModule\\",{value:!0}),i);var d=w((_,l)=>{l.exports=_jsx_runtime});var I={};y(I,{default:()=>h,frontmatter:()=>E});var n=b(d()),E={title:\\"How to remove PII from your Elastic data in 3 easy steps\\",slug:\\"remove-pii-data\\",date:\\"2023-06-20\\",description:\\"Personally Identifiable Information compliance is an ever increasing challenge for any organization. With Elastic\'s intuitive ML interface and parsing capabilities, sensitive data may be easily redacted from unstructured data with ease.\\",author:[{slug:\\"peter-titov\\"}],image:\\"blog-post4-ai-search-B.jpg\\",tags:[{slug:\\"log-analytics\\"},{slug:\\"esre\\"},{slug:\\"genai\\"},{slug:\\"security\\"}]};function c(i){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...i.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"Personally identifiable information (PII) compliance is an ever-increasing challenge for any organization. Whether you\\\\u2019re in ecommerce, banking, healthcare, or other fields where data is sensitive, PII may inadvertently be captured and stored. Having structured logs enables quick identification, removal, and protection of sensitive data fields easily; but what about unstructured messages? Or perhaps call center transcriptions?\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elasticsearch, with its long experience in \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/what-is/elasticsearch-machine-learning\\",rel:\\"nofollow\\",children:\\"machine learning\\"}),\\", provides various options to bring in custom models, such as large language models (LLMs), and provides its own models. These models will help implement PII redaction.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you would like to learn more about natural language processing, machine learning, and Elastic, please be sure to check out these related articles:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/introduction-to-nlp-with-pytorch-models\\",rel:\\"nofollow\\",children:\\"Introduction to modern natural language processing with PyTorch in Elasticsearch\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/how-to-deploy-natural-language-processing-nlp-getting-started\\",rel:\\"nofollow\\",children:\\"How to deploy natural language processing (NLP): Getting started\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/redact-processor.html\\",rel:\\"nofollow\\",children:\\"Elastic Redact Processor Documentation\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/may-2023-launch-sparse-encoder-ai-model\\",rel:\\"nofollow\\",children:\\"Introducing Elastic Learned Sparse Encoder: Elastic\\\\u2019s AI model for semantic search\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/may-2023-launch-machine-learning-models\\",rel:\\"nofollow\\",children:\\"Accessing machine learning models in Elastic\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we will show you how to set up PII redaction through the use of Elasticsearch\\\\u2019s ability to load a trained model within machine learning and the flexibility of Elastic\\\\u2019s ingest pipelines.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Specifically, we\\\\u2019ll walk through setting up a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/how-to-deploy-nlp-named-entity-recognition-ner-example\\",rel:\\"nofollow\\",children:\\"named entity recognition (NER)\\"}),\\" model for person and location identification, as well as deploying the redact processor for custom data identification and removal. All of this will then be combined with an ingest pipeline where we can use Elastic machine learning and data transformations capabilities to remove sensitive information from your data.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"loading-the-trained-model\\",children:\\"Loading the trained model\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Before we begin, we must load our NER model into our Elasticsearch cluster. This may be easily accomplished with Docker and the Elastic Eland client. From a command line, let\\\\u2019s install the Eland client via git:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/elastic/eland.git\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Navigate into the recently downloaded client:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`cd eland/\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now let\\\\u2019s build the client:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t elastic/eland .\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"From here, you\\\\u2019re ready to deploy the trained model to an Elastic machine learning node! Be sure to replace your username, password, es-cluster-hostname, and esport.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you\\\\u2019re using the Elastic Cloud or have signed certificates, simply run this command:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker run -it --rm --network host elastic/eland eland_import_hub_model --url https://:@:/ --hub-model-id dslim/bert-base-NER --task-type ner --start\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you\\\\u2019re using self-signed certificates, run this command:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker run -it --rm --network host elastic/eland eland_import_hub_model --url https://:@:/ --insecure --hub-model-id dslim/bert-base-NER --task-type ner --start\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"From here you\\\\u2019ll witness the Eland client in action downloading the trained model from \\",(0,n.jsx)(e.a,{href:\\"https://huggingface.co/dslim/bert-base-NER\\",rel:\\"nofollow\\",children:\\"HuggingFace\\"}),\\" and automatically deploying it into your cluster!\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/remove-pii-data/blog-elastic-huggingface.png\\",alt:\\"huggingface code\\",width:\\"889\\",height:\\"229\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Synchronize your newly loaded trained model by clicking on the blue hyperlink via your Machine Learning Overview UI \\\\u201CSynchronize your jobs and trained models.\\\\u201D\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/remove-pii-data/blog-elastic-Machine-Learning-Overview-UI.png\\",alt:\\"Machine Learning Overview UI\\",width:\\"1235\\",height:\\"759\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now click the Synchronize button.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/remove-pii-data/blog-elastic-Synchronize-button.png\\",alt:\\"Synchronize button\\",width:\\"1248\\",height:\\"758\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"That\\\\u2019s it! Congratulations, you just loaded your first trained model into Elastic!\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"create-the-redact-processor-and-ingest-pipeline\\",children:\\"Create the redact processor and ingest pipeline\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"From DevTools, let\\\\u2019s configure the redact processor along with our inference processor to take advantage of Elastic\\\\u2019s trained model we just loaded. This will create an ingest pipeline named \\\\u201Credact\\\\u201D that we can then use to remove sensitive data from any field we wish. In this example, I\\\\u2019ll be focusing on the \\\\u201Cmessage\\\\u201D field. Note: at the time of this writing, the redact processor is experimental and must be created via DevTools.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/redact\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"redacted\\",\\n \\"value\\": \\"{{{message}}}\\"\\n }\\n },\\n {\\n \\"inference\\": {\\n \\"model_id\\": \\"dslim__bert-base-ner\\",\\n \\"field_map\\": {\\n \\"message\\": \\"text_field\\"\\n }\\n }\\n },\\n {\\n \\"script\\": {\\n \\"lang\\": \\"painless\\",\\n \\"source\\": \\"String msg = ctx[\'message\'];\\\\\\\\r\\\\\\\\n for (item in ctx[\'ml\'][\'inference\'][\'entities\']) {\\\\\\\\r\\\\\\\\n msg = msg.replace(item[\'entity\'], \'<\' + item[\'class_name\'] + \'>\')\\\\\\\\r\\\\\\\\n }\\\\\\\\r\\\\\\\\n ctx[\'redacted\']=msg\\"\\n }\\n },\\n {\\n \\"redact\\": {\\n \\"field\\": \\"redacted\\",\\n \\"patterns\\": [\\n \\"%{EMAILADDRESS:EMAIL}\\",\\n \\"%{IP:IP_ADDRESS}\\",\\n \\"%{CREDIT_CARD:CREDIT_CARD}\\",\\n \\"%{SSN:SSN}\\",\\n \\"%{PHONE:PHONE}\\"\\n ],\\n \\"pattern_definitions\\": {\\n \\"CREDIT_CARD\\": \\"\\\\\\\\d{4}[ -]\\\\\\\\d{4}[ -]\\\\\\\\d{4}[ -]\\\\\\\\d{4}\\",\\n \\"SSN\\": \\"\\\\\\\\d{3}-\\\\\\\\d{2}-\\\\\\\\d{4}\\",\\n \\"PHONE\\": \\"\\\\\\\\d{3}-\\\\\\\\d{3}-\\\\\\\\d{4}\\"\\n }\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"field\\": [\\n \\"ml\\"\\n ],\\n \\"ignore_missing\\": true,\\n \\"ignore_failure\\": true\\n }\\n }\\n ],\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"pii_script-redact\\"\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"OK, but what does each processor really do? Let\\\\u2019s walk through each processor in detail here:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The SET processor creates the field \\\\u201Credacted,\\\\u201D which is copied over from the message field and used later on in the pipeline.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The INFERENCE processor calls the NER model we loaded to be used on the message field for identifying names, locations, and organizations.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The SCRIPT processor then replaced the detected entities within the redacted field from the message field.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Our REDACT processor uses Grok patterns to identify any custom set of data we wish to remove from the redacted field (which was copied over from the message field).\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The REMOVE processor deletes the extraneous ml.* fields from being indexed; note we\\\\u2019ll add \\\\u201Cmessage\\\\u201D to this processor once we validate data is being redacted properly.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The ON_FAILURE / SET processor captures any errors just in case we have them.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"slice-your-pii\\",children:\\"Slice your PII\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that your ingest pipeline with all the necessary steps has been configured, let\\\\u2019s start testing how well we can remove sensitive data from documents. Navigate over to Stack Management, select Ingest Pipelines and search for \\\\u201Credact\\\\u201D, and then click on the result.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/remove-pii-data/blog-elastic-Ingest-Pipelines.png\\",alt:\\"Ingest Pipelines\\",width:\\"1083\\",height:\\"674\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Click on the Manage button, and then click Edit.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/remove-pii-data/elastic-blog-Manage-button.png\\",alt:\\"Manage button\\",width:\\"1078\\",height:\\"765\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here we are going to test our pipeline by adding some documents. Below is a sample you can copy and paste to make sure everything is working correctly.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/remove-pii-data/elastic-blog-test-pipeline.png\\",alt:\\"test pipeline\\",width:\\"530\\",height:\\"641\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`{\\n \\"_source\\":\\n {\\n \\"message\\": \\"John Smith lives at 123 Main St. Highland Park, CO. His email address is jsmith123@email.com and his phone number is 412-189-9043. I found his social security number, it is 942-00-1243. Oh btw, his credit card is 1324-8374-0978-2819 and his gateway IP is 192.168.1.2\\",\\n },\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Simply press the Run the pipeline button, and you will then see the following output:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/remove-pii-data/elastic-blog-pii-output-2.png\\",alt:\\"pii output code\\",width:\\"461\\",height:\\"457\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"whats-next\\",children:\\"What\\\\u2019s next?\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"After you\\\\u2019ve added this ingest pipeline to a data set you\\\\u2019re indexing and validated that it is meeting expectations, you can add the message field to be removed so that no PII data is indexed. Simply update your REMOVE processor to include the message field and simulate again to only see the redacted field.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/remove-pii-data/elastic-blog-manage-processor.png\\",alt:\\"\\",width:\\"533\\",height:\\"709\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/remove-pii-data/elastic-blog-pii-output.png\\",alt:\\"pii output code 2\\",width:\\"436\\",height:\\"357\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"With this step-by-step approach, you are now ready and able to detect and redact any sensitive data throughout your indices.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here\\\\u2019s a quick recap of what we covered:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Loading a pre-trained named entity recognition model into an Elastic cluster\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Configuring the Redact processor, along with the inference processor, to use the trained model during data ingestion\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Testing sample data and modifying the ingest pipeline to safely remove personally identifiable information\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Ready to get started? Sign up \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"for Elastic Cloud\\"}),\\" and try out the features and capabilities I\\\\u2019ve outlined above to get the most value and visibility out of your OpenTelemetry data.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"In this blog post, we may have used third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"Elastic, Elasticsearch and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function h(i={}){let{wrapper:e}=i.components||{};return e?(0,n.jsx)(e,{...i,children:(0,n.jsx)(c,{...i})}):c(i)}return v(I);})();\\n;return Component;"},"_id":"articles/how-to-remove-pii-elastic-data.mdx","_raw":{"sourceFilePath":"articles/how-to-remove-pii-elastic-data.mdx","sourceFileName":"how-to-remove-pii-elastic-data.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/how-to-remove-pii-elastic-data"},"type":"Article","imageUrl":"/assets/images/remove-pii-data/blog-post4-ai-search-B.jpg","readingTime":"10 min read","url":"/remove-pii-data","headings":[{"level":2,"title":"Loading the trained model","href":"#loading-the-trained-model"},{"level":2,"title":"Create the redact processor and ingest pipeline","href":"#create-the-redact-processor-and-ingest-pipeline"},{"level":2,"title":"Slice your PII","href":"#slice-your-pii"},{"level":2,"title":"What’s next?","href":"#whats-next"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Improving the Elastic APM UI performance with continuous rollups and service metrics","slug":"apm-ui-performance-continuous-rollups-service-metrics","date":"2023-06-29","description":"We made significant improvements to the UI performance in Elastic APM to make it scale with even the most demanding workloads, by pre-aggregating metrics at the service level, and storing the metrics at different levels of granularity.","image":"elastic-blog-header-ui.png","author":[{"slug":"felix-barnsteiner","type":"Author","_raw":{}},{"slug":"yngrid-coello","type":"Author","_raw":{}},{"slug":"dario-gieselaar","type":"Author","_raw":{}},{"slug":"carson-ip","type":"Author","_raw":{}}],"tags":[{"slug":"apm","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"elastic-architecture-enhancements","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn today\'s fast-paced digital landscape, the ability to monitor and optimize application performance is crucial for organizations striving to deliver exceptional user experiences. At Elastic, we recognize the significance of providing our user base with a reliable [observability platform](https://www.elastic.co/observability) that scales with you as you’re onboarding thousands of services that produce terabytes of data each day. We have been diligently working behind the scenes to enhance our solution to meet the demands of even the largest deployments.\\n\\nIn this blog post, we are excited to share the significant strides we have made in improving the UI performance of Elastic APM. Maintaining a snappy user interface can be a challenge when interactively summarizing the massive amounts of data needed to provide an overview of the performance for an entire enterprise-scale service inventory. We want to assure our customers that we have listened, taken action, and made notable architectural changes to elevate the scalability and maturity of our solution.\\n\\n## Architectural enhancements\\n\\nOur journey began back in the 7.x series where we noticed that doing ad-hoc aggregations on raw [transaction](https://www.elastic.co/guide/en/apm/guide/current/data-model-transactions.html) data put Elasticsearch\xae under a lot of pressure in large-scale environments. Since then, we’ve begun to pre-aggregate the transactions into transaction metrics during ingestion. This has helped to keep the performance of the UI relatively stable. Regardless of how busy the monitored application is and how many transaction events it is creating, we’re just querying pre-aggregated metrics that are stored at a constant rate. We’ve enabled the metrics-powered UI by default in [7.15](https://github.com/elastic/kibana/issues/92024).\\n\\nHowever, when showing an inventory of a large number of services over large time ranges, the number of metric data points that need to be aggregated can still be large enough to cause performance issues. We also create a time series for each distinct set of dimensions. The dimensions include metadata, such as the transaction name and the host name. Our [documentation](https://www.elastic.co/guide/en/apm/guide/current/data-model-metrics.html#_transaction_metrics) includes a full list of all available dimensions. If there’s a very high number of unique transaction names, which could be a result of improper instrumentation (see [docs](https://www.elastic.co/guide/en/kibana/current/troubleshooting.html#troubleshooting-too-many-transactions) for more details), this will create a lot of individual time series that will need to be aggregated when requesting a summary of the service’s overall performance. Global labels that are added to the APM Agent configuration are also added as dimensions to these metrics, and therefore they can also impact the number of time series. Refer to the FAQs section below for more details.\\n\\nWithin the 8.7 and 8.8 releases, we’ve addressed these challenges with the following architectural enhancements that aim to reduce the number of documents Elasticsearch needs to search and aggregate on-the-fly, resulting in faster response times:\\n\\n- **Pre-aggregation of transaction metrics into service metrics.** Instead of aggregating all distinct time series that are created for each individual transaction name on-the-fly for every user request, we’re already pre-aggregating a summary time series for each service during data ingestion. Depending on how many unique transaction names the services have, this reduces the number of documents Elasticsearch needs to look up and aggregate by a factor of typically 10–100. This is particularly useful for the [service inventory](https://www.elastic.co/guide/en/kibana/master/services.html) and the [service overview](https://www.elastic.co/guide/en/kibana/master/service-overview.html) pages.\\n- **Pre-aggregation of all metrics into different levels of granularity.** The APM UI chooses the most appropriate level of granularity, depending on the selected time range. In addition to the metrics that are stored at a 1-minute granularity, we’re also summarizing and storing metrics at a 10-minute and 60-minute granularity level. For example, when looking at a 7-day period, the 60-minute data stream is queried instead of the 1-minute one, resulting in 60x fewer documents for Elasticsearch to examine. This makes sure that all graphs are rendered quickly, even when looking at larger time ranges.\\n- **Safeguards on the number of unique transactions per service for which we are aggregating metrics.** Our agents are designed to keep the cardinality of the transaction name low. But in the wild, we’ve seen some services that have a huge amount of unique transaction names. This used to cause performance problems in the UI because APM Server would create many time series that the UI needed to aggregate at query time. In order to protect APM Server from running out of memory when aggregating a large number of time series for each unique transaction name, metrics were published without aggregating when limits for the number of time series were reached. This resulted in a lot of individual metric documents that needed to be aggregated at query time. To address the problem, we\'ve introduced a system where we aggregate metrics in a dedicated overflow bucket for each service when limits are reached. Refer to our [documentation](https://www.elastic.co/guide/en/kibana/8.8/troubleshooting.html#troubleshooting-too-many-transactions) for more details.\\n\\nThe exact factor of the document count reduction depends on various conditions. But to get a feeling for a typical scenario, if your services, on average, have 10 instances, no instance-specific global labels, 100 unique transaction names each, and you’re looking at time ranges that can leverage the 60m granularity, you’d see a reduction of documents that Elasticsearch needs to aggregate by a factor of 180,000 (10 instances x 100 transaction names x 60m x 3 because we’re also collapsing the event.outcome dimension). While the response times of Elasticsearch aggregations isn’t exactly scaling linearly with the number of documents, there is a strong correlation.\\n\\n## FAQs\\n\\n### When upgrading to the latest version, will my old data also load faster?\\n\\nUpdating to 8.8 doesn’t immediately make the UI faster. Because the improvements are powered by pre-aggregations that APM Server is doing during ingestion, only new data will benefit from it. For that reason, you should also make sure to update APM Server as well. The UI can still display data that was ingested using an older version of the stack.\\n\\n### If the UI is based on metrics, can I still slice and dice using custom labels?\\n\\nHigh cardinality analysis is a big strength of Elastic Observability, and this focus on pre-aggregated metrics does not compromise that in any way.\\n\\nThe UI implements a sophisticated fallback mechanism that uses service metrics, transaction metrics, or raw transaction events, depending on which filters are applied. We’re not creating metrics for each user.id, for example. But you can still filter the data by user.id and the UI will then use raw transaction events. Chances are that you’re looking at a narrow slice of data when filtering by a dimension that is not available on the pre-aggregated metrics, therefore aggregations on the raw data are typically very fast.\\n\\nNote that all global labels that are added to the APM agent configuration are part of the dimension of the pre-aggregated metrics, with the exception of RUM (see more details in [this issue](https://github.com/elastic/apm-server/issues/11037)).\\n\\n### Can I use the pre-aggregated metrics in custom dashboards?\\n\\nYes! If you use [Lens](https://www.elastic.co/guide/en/kibana/current/lens.html) and select the \\"APM\\" data view, you can filter on either metricset.name:service_transaction or metricset.name:transaction, depending on the level of detail you need. Transaction latency is captured in transaction.duration.histogram, and successful outcomes and failed outcomes are stored in event.success_count. If you don\'t need a distribution of values, you can also select the transaction.duration.summary field for your metric aggregations, which should be faster. If you want to calculate the failure rate, here\'s a [Lens formula](https://www.elastic.co/guide/en/kibana/current/lens.html#lens-formulas): 1 - (sum(event.success_count) / count(event.success_count)). Note that the only granularity supported here is 1m.\\n\\n### Do the additional metrics have an impact on the storage?\\n\\nWhile we’re storing more metrics than before, and we’re storing all metrics in different levels of granularity, we were able to offset that by enabling [synthetic source](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#synthetic-source) for all metric data streams. We’ve even increased the default retention for the metrics in the coarse-grained granularity levels, so that the 60m rollup data streams are now stored for 390 days. Please consult our [documentation](https://www.elastic.co/guide/en/apm/guide/current/apm-data-streams.html) for more information about the different metric data streams.\\n\\n### Are there limits on the amount of time series that APM Server can aggregate?\\n\\nAPM Server performs pre-aggregations in memory, which is fast, but consumes a considerable amount of memory. There are limits in place to protect APM Server from running out of memory, and from 8.7, most of them scale with available memory by default, meaning that allocating more memory to APM Server will allow it to handle more unique pre-aggregation groups like services and transactions. These limits are described in [APM Server Data Model docs](https://www.elastic.co/guide/en/apm/guide/current/data-model-metrics.html#_aggregated_metrics_limits_and_overflows).\\n\\nOn the APM Server roadmap, we have plans to move to a LSM-based approach where pre-aggregations are performed with the help of disks in order to reduce memory usage. This will enable APM Server to scale better with the input size and cardinality.\\n\\nA common pitfall when working with pre-aggregations is to add instance-specific global labels to APM agents. This may exhaust the aggregation limits and cause metrics to be aggregated under the overflow bucket instead of the corresponding service. Therefore, make sure to follow the best practice of only adding a limited set of global labels to a particular service.\\n\\n## Validation\\n\\nTo validate the effectiveness of the new architecture, and to ensure that the accuracy of the data is not negatively affected, we prepared a test environment where we generated 35K+ transactions per minute in a timespan of 14 days resulting in approximately 850 million documents.\\n\\nWe’ve tested the queries that power our service inventory, the service overview, and the transaction details using different time ranges (1d, 7d, 14d). Across the board, we’ve seen orders of magnitude improvements. Particularly, queries across larger time ranges that benefit from using the coarse-grained metrics in addition to the pre-aggregated service metrics saw incredible reductions of the response time.\\n\\nWe’ve also validated that there’s no loss in accuracy when using the more coarse-grained metrics for larger time ranges.\\n\\nEvery environment will behave a bit differently, but we’re confident that the impressive improvements in response time will translate well to setups of even bigger scale.\\n\\n## Planned improvements\\n\\nAs mentioned in the FAQs section, the number of time series for transaction metrics can grow quickly, as it is the product of multiple dimensions. For example, given a service that runs on 100 hosts and has 100 transaction names that each have 4 transaction results, APM Server needs to track 40,000 (100 x 100 x 4) different time series for that service. This would even exceed the maximum per-service limit of 32,000 for APM Servers with 64GB of main memory.\\n\\nAs a result, the UI will show an entry for “Remaining Transactions” in the Service overview page. This tracks the transaction metrics for a service once it hits the limit. As a result, you may not see all transaction names of your service. It may also be that all distinct transaction names are listed, but that the transaction metrics for some of the instances of that service are combined in the “Remaining Transactions” category.\\n\\nWe’re currently considering restructuring the dimensions for the metrics to avoid that the combination of the dimensions for transaction name and service instance-specific dimensions (such as the host name) lead to an explosion of time series. Stay tuned for more details.\\n\\n## Conclusion\\n\\nThe architectural improvements we’ve delivered in the past releases provide a step-function in terms of the scalability and responsiveness of our UI. Instead of having to aggregate massive amounts of data on-the-fly as users are navigating through the user interface, we pre-aggregate the results for the most common queries as data is coming in. This ensures we have the answers ready before users have even asked their most frequently asked questions, while still being able to answer ad-hoc questions.\\n\\nWe are excited to continue supporting our community members as they push boundaries on their growth journey, providing them with a powerful and mature platform that can effortlessly handle the demands of the largest workloads. Elastic is committed to its mission to enable everyone to find the answers that matter. From all data. In real time. At scale.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var m=Object.create;var i=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var w=(a,e)=>()=>(e||a((e={exports:{}}).exports,e),e.exports),v=(a,e)=>{for(var n in e)i(a,n,{get:e[n],enumerable:!0})},o=(a,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of g(e))!p.call(a,r)&&r!==n&&i(a,r,{get:()=>e[r],enumerable:!(s=u(e,r))||s.enumerable});return a};var y=(a,e,n)=>(n=a!=null?m(f(a)):{},o(e||!a||!a.__esModule?i(n,\\"default\\",{value:a,enumerable:!0}):n,a)),b=a=>o(i({},\\"__esModule\\",{value:!0}),a);var c=w((I,l)=>{l.exports=_jsx_runtime});var A={};v(A,{default:()=>d,frontmatter:()=>k});var t=y(c()),k={title:\\"Improving the Elastic APM UI performance with continuous rollups and service metrics\\",slug:\\"apm-ui-performance-continuous-rollups-service-metrics\\",date:\\"2023-06-29\\",description:\\"We made significant improvements to the UI performance in Elastic APM to make it scale with even the most demanding workloads, by pre-aggregating metrics at the service level, and storing the metrics at different levels of granularity.\\",author:[{slug:\\"felix-barnsteiner\\"},{slug:\\"yngrid-coello\\"},{slug:\\"dario-gieselaar\\"},{slug:\\"carson-ip\\"}],image:\\"elastic-blog-header-ui.png\\",tags:[{slug:\\"apm\\"},{slug:\\"metrics\\"},{slug:\\"elastic-architecture-enhancements\\"}]};function h(a){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...a.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"In today\'s fast-paced digital landscape, the ability to monitor and optimize application performance is crucial for organizations striving to deliver exceptional user experiences. At Elastic, we recognize the significance of providing our user base with a reliable \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"observability platform\\"}),\\" that scales with you as you\\\\u2019re onboarding thousands of services that produce terabytes of data each day. We have been diligently working behind the scenes to enhance our solution to meet the demands of even the largest deployments.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog post, we are excited to share the significant strides we have made in improving the UI performance of Elastic APM. Maintaining a snappy user interface can be a challenge when interactively summarizing the massive amounts of data needed to provide an overview of the performance for an entire enterprise-scale service inventory. We want to assure our customers that we have listened, taken action, and made notable architectural changes to elevate the scalability and maturity of our solution.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"architectural-enhancements\\",children:\\"Architectural enhancements\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Our journey began back in the 7.x series where we noticed that doing ad-hoc aggregations on raw \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/data-model-transactions.html\\",rel:\\"nofollow\\",children:\\"transaction\\"}),\\" data put Elasticsearch\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" under a lot of pressure in large-scale environments. Since then, we\\\\u2019ve begun to pre-aggregate the transactions into transaction metrics during ingestion. This has helped to keep the performance of the UI relatively stable. Regardless of how busy the monitored application is and how many transaction events it is creating, we\\\\u2019re just querying pre-aggregated metrics that are stored at a constant rate. We\\\\u2019ve enabled the metrics-powered UI by default in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/kibana/issues/92024\\",rel:\\"nofollow\\",children:\\"7.15\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"However, when showing an inventory of a large number of services over large time ranges, the number of metric data points that need to be aggregated can still be large enough to cause performance issues. We also create a time series for each distinct set of dimensions. The dimensions include metadata, such as the transaction name and the host name. Our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/data-model-metrics.html#_transaction_metrics\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\" includes a full list of all available dimensions. If there\\\\u2019s a very high number of unique transaction names, which could be a result of improper instrumentation (see \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/troubleshooting.html#troubleshooting-too-many-transactions\\",rel:\\"nofollow\\",children:\\"docs\\"}),\\" for more details), this will create a lot of individual time series that will need to be aggregated when requesting a summary of the service\\\\u2019s overall performance. Global labels that are added to the APM Agent configuration are also added as dimensions to these metrics, and therefore they can also impact the number of time series. Refer to the FAQs section below for more details.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Within the 8.7 and 8.8 releases, we\\\\u2019ve addressed these challenges with the following architectural enhancements that aim to reduce the number of documents Elasticsearch needs to search and aggregate on-the-fly, resulting in faster response times:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Pre-aggregation of transaction metrics into service metrics.\\"}),\\" Instead of aggregating all distinct time series that are created for each individual transaction name on-the-fly for every user request, we\\\\u2019re already pre-aggregating a summary time series for each service during data ingestion. Depending on how many unique transaction names the services have, this reduces the number of documents Elasticsearch needs to look up and aggregate by a factor of typically 10\\\\u2013100. This is particularly useful for the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/master/services.html\\",rel:\\"nofollow\\",children:\\"service inventory\\"}),\\" and the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/master/service-overview.html\\",rel:\\"nofollow\\",children:\\"service overview\\"}),\\" pages.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Pre-aggregation of all metrics into different levels of granularity.\\"}),\\" The APM UI chooses the most appropriate level of granularity, depending on the selected time range. In addition to the metrics that are stored at a 1-minute granularity, we\\\\u2019re also summarizing and storing metrics at a 10-minute and 60-minute granularity level. For example, when looking at a 7-day period, the 60-minute data stream is queried instead of the 1-minute one, resulting in 60x fewer documents for Elasticsearch to examine. This makes sure that all graphs are rendered quickly, even when looking at larger time ranges.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Safeguards on the number of unique transactions per service for which we are aggregating metrics.\\"}),\\" Our agents are designed to keep the cardinality of the transaction name low. But in the wild, we\\\\u2019ve seen some services that have a huge amount of unique transaction names. This used to cause performance problems in the UI because APM Server would create many time series that the UI needed to aggregate at query time. In order to protect APM Server from running out of memory when aggregating a large number of time series for each unique transaction name, metrics were published without aggregating when limits for the number of time series were reached. This resulted in a lot of individual metric documents that needed to be aggregated at query time. To address the problem, we\'ve introduced a system where we aggregate metrics in a dedicated overflow bucket for each service when limits are reached. Refer to our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/8.8/troubleshooting.html#troubleshooting-too-many-transactions\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\" for more details.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The exact factor of the document count reduction depends on various conditions. But to get a feeling for a typical scenario, if your services, on average, have 10 instances, no instance-specific global labels, 100 unique transaction names each, and you\\\\u2019re looking at time ranges that can leverage the 60m granularity, you\\\\u2019d see a reduction of documents that Elasticsearch needs to aggregate by a factor of 180,000 (10 instances x 100 transaction names x 60m x 3 because we\\\\u2019re also collapsing the event.outcome dimension). While the response times of Elasticsearch aggregations isn\\\\u2019t exactly scaling linearly with the number of documents, there is a strong correlation.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"faqs\\",children:\\"FAQs\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"when-upgrading-to-the-latest-version-will-my-old-data-also-load-faster\\",children:\\"When upgrading to the latest version, will my old data also load faster?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Updating to 8.8 doesn\\\\u2019t immediately make the UI faster. Because the improvements are powered by pre-aggregations that APM Server is doing during ingestion, only new data will benefit from it. For that reason, you should also make sure to update APM Server as well. The UI can still display data that was ingested using an older version of the stack.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"if-the-ui-is-based-on-metrics-can-i-still-slice-and-dice-using-custom-labels\\",children:\\"If the UI is based on metrics, can I still slice and dice using custom labels?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"High cardinality analysis is a big strength of Elastic Observability, and this focus on pre-aggregated metrics does not compromise that in any way.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The UI implements a sophisticated fallback mechanism that uses service metrics, transaction metrics, or raw transaction events, depending on which filters are applied. We\\\\u2019re not creating metrics for each user.id, for example. But you can still filter the data by user.id and the UI will then use raw transaction events. Chances are that you\\\\u2019re looking at a narrow slice of data when filtering by a dimension that is not available on the pre-aggregated metrics, therefore aggregations on the raw data are typically very fast.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Note that all global labels that are added to the APM agent configuration are part of the dimension of the pre-aggregated metrics, with the exception of RUM (see more details in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-server/issues/11037\\",rel:\\"nofollow\\",children:\\"this issue\\"}),\\").\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"can-i-use-the-pre-aggregated-metrics-in-custom-dashboards\\",children:\\"Can I use the pre-aggregated metrics in custom dashboards?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Yes! If you use \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/lens.html\\",rel:\\"nofollow\\",children:\\"Lens\\"}),` and select the \\"APM\\" data view, you can filter on either metricset.name:service_transaction or metricset.name:transaction, depending on the level of detail you need. Transaction latency is captured in transaction.duration.histogram, and successful outcomes and failed outcomes are stored in event.success_count. If you don\'t need a distribution of values, you can also select the transaction.duration.summary field for your metric aggregations, which should be faster. If you want to calculate the failure rate, here\'s a `,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/lens.html#lens-formulas\\",rel:\\"nofollow\\",children:\\"Lens formula\\"}),\\": 1 - (sum(event.success_count) / count(event.success_count)). Note that the only granularity supported here is 1m.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"do-the-additional-metrics-have-an-impact-on-the-storage\\",children:\\"Do the additional metrics have an impact on the storage?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"While we\\\\u2019re storing more metrics than before, and we\\\\u2019re storing all metrics in different levels of granularity, we were able to offset that by enabling \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#synthetic-source\\",rel:\\"nofollow\\",children:\\"synthetic source\\"}),\\" for all metric data streams. We\\\\u2019ve even increased the default retention for the metrics in the coarse-grained granularity levels, so that the 60m rollup data streams are now stored for 390 days. Please consult our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/apm-data-streams.html\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\" for more information about the different metric data streams.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"are-there-limits-on-the-amount-of-time-series-that-apm-server-can-aggregate\\",children:\\"Are there limits on the amount of time series that APM Server can aggregate?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"APM Server performs pre-aggregations in memory, which is fast, but consumes a considerable amount of memory. There are limits in place to protect APM Server from running out of memory, and from 8.7, most of them scale with available memory by default, meaning that allocating more memory to APM Server will allow it to handle more unique pre-aggregation groups like services and transactions. These limits are described in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/data-model-metrics.html#_aggregated_metrics_limits_and_overflows\\",rel:\\"nofollow\\",children:\\"APM Server Data Model docs\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"On the APM Server roadmap, we have plans to move to a LSM-based approach where pre-aggregations are performed with the help of disks in order to reduce memory usage. This will enable APM Server to scale better with the input size and cardinality.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"A common pitfall when working with pre-aggregations is to add instance-specific global labels to APM agents. This may exhaust the aggregation limits and cause metrics to be aggregated under the overflow bucket instead of the corresponding service. Therefore, make sure to follow the best practice of only adding a limited set of global labels to a particular service.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"validation\\",children:\\"Validation\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To validate the effectiveness of the new architecture, and to ensure that the accuracy of the data is not negatively affected, we prepared a test environment where we generated 35K+ transactions per minute in a timespan of 14 days resulting in approximately 850 million documents.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\\\\u2019ve tested the queries that power our service inventory, the service overview, and the transaction details using different time ranges (1d, 7d, 14d). Across the board, we\\\\u2019ve seen orders of magnitude improvements. Particularly, queries across larger time ranges that benefit from using the coarse-grained metrics in addition to the pre-aggregated service metrics saw incredible reductions of the response time.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\\\\u2019ve also validated that there\\\\u2019s no loss in accuracy when using the more coarse-grained metrics for larger time ranges.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Every environment will behave a bit differently, but we\\\\u2019re confident that the impressive improvements in response time will translate well to setups of even bigger scale.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"planned-improvements\\",children:\\"Planned improvements\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As mentioned in the FAQs section, the number of time series for transaction metrics can grow quickly, as it is the product of multiple dimensions. For example, given a service that runs on 100 hosts and has 100 transaction names that each have 4 transaction results, APM Server needs to track 40,000 (100 x 100 x 4) different time series for that service. This would even exceed the maximum per-service limit of 32,000 for APM Servers with 64GB of main memory.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As a result, the UI will show an entry for \\\\u201CRemaining Transactions\\\\u201D in the Service overview page. This tracks the transaction metrics for a service once it hits the limit. As a result, you may not see all transaction names of your service. It may also be that all distinct transaction names are listed, but that the transaction metrics for some of the instances of that service are combined in the \\\\u201CRemaining Transactions\\\\u201D category.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\\\\u2019re currently considering restructuring the dimensions for the metrics to avoid that the combination of the dimensions for transaction name and service instance-specific dimensions (such as the host name) lead to an explosion of time series. Stay tuned for more details.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The architectural improvements we\\\\u2019ve delivered in the past releases provide a step-function in terms of the scalability and responsiveness of our UI. Instead of having to aggregate massive amounts of data on-the-fly as users are navigating through the user interface, we pre-aggregate the results for the most common queries as data is coming in. This ensures we have the answers ready before users have even asked their most frequently asked questions, while still being able to answer ad-hoc questions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We are excited to continue supporting our community members as they push boundaries on their growth journey, providing them with a powerful and mature platform that can effortlessly handle the demands of the largest workloads. Elastic is committed to its mission to enable everyone to find the answers that matter. From all data. In real time. At scale.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(a={}){let{wrapper:e}=a.components||{};return e?(0,t.jsx)(e,{...a,children:(0,t.jsx)(h,{...a})}):h(a)}return b(A);})();\\n;return Component;"},"_id":"articles/improving-elastic-apm-ui-performance-continuous-rollups-service-metrics.mdx","_raw":{"sourceFilePath":"articles/improving-elastic-apm-ui-performance-continuous-rollups-service-metrics.mdx","sourceFileName":"improving-elastic-apm-ui-performance-continuous-rollups-service-metrics.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/improving-elastic-apm-ui-performance-continuous-rollups-service-metrics"},"type":"Article","imageUrl":"/assets/images/apm-ui-performance-continuous-rollups-service-metrics/elastic-blog-header-ui.png","readingTime":"11 min read","url":"/apm-ui-performance-continuous-rollups-service-metrics","headings":[{"level":2,"title":"Architectural enhancements","href":"#architectural-enhancements"},{"level":2,"title":"FAQs","href":"#faqs"},{"level":3,"title":"When upgrading to the latest version, will my old data also load faster?","href":"#when-upgrading-to-the-latest-version-will-my-old-data-also-load-faster"},{"level":3,"title":"If the UI is based on metrics, can I still slice and dice using custom labels?","href":"#if-the-ui-is-based-on-metrics-can-i-still-slice-and-dice-using-custom-labels"},{"level":3,"title":"Can I use the pre-aggregated metrics in custom dashboards?","href":"#can-i-use-the-pre-aggregated-metrics-in-custom-dashboards"},{"level":3,"title":"Do the additional metrics have an impact on the storage?","href":"#do-the-additional-metrics-have-an-impact-on-the-storage"},{"level":3,"title":"Are there limits on the amount of time series that APM Server can aggregate?","href":"#are-there-limits-on-the-amount-of-time-series-that-apm-server-can-aggregate"},{"level":2,"title":"Validation","href":"#validation"},{"level":2,"title":"Planned improvements","href":"#planned-improvements"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Infrastructure monitoring with OpenTelemetry in Elastic Observability","slug":"infrastructure-monitoring-with-opentelemetry-in-elastic-observability","date":"2024-07-24","description":"Integrating OpenTelemetry with Elastic Observability for Application and Infrastructure Monitoring Solutions.","image":"Monitoring-infra-with-Otel.png","author":[{"slug":"ishleen-kaur","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"cloud-monitoring","type":"Tag","_raw":{}}],"body":{"raw":"\\nAt Elastic, we recently made a decision to fully embrace OpenTelemetry as the premier data collection framework. As an Observability engineer, I firmly believe that vendor agnosticism is essential for delivering the greatest value to our customers. By committing to OpenTelemetry, we are not only staying current with technological advancements but also driving them forward. This investment positions us at the forefront of the industry, championing a more open and flexible approach to observability.\\n\\nElastic donated [Elastic Common Schema (ECS)](https://www.elastic.co/guide/en/ecs/current/index.html) to OpenTelemetry and is actively working to [converge](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/) it with semantic conventions. In the meantime, we are dedicated to support our users by ensuring they don’t have to navigate different standards. Our goal is to provide a seamless end-to-end experience while using OpenTelemetry with our application and infrastructure monitoring solutions. This commitment allows users to benefit from the best of both worlds without any friction.\\n\\nIn this blog, we explore how to use the OpenTelemetry (OTel) collector to capture core system metrics from various sources such as AWS EC2, Google Compute, Kubernetes clusters, and individual systems running Linux or MacOS.\\n\\n## Powering Infrastructure UIs with Two Ingest Paths\\n\\nElastic users who wish to have OpenTelemetry as their data collection mechanism can now monitor the health of the hosts where the OpenTelemetry collector is deployed using the Hosts and Inventory UIs available in Elastic Observability.\\n\\nElastic offers two distinct ingest paths to power Infrastructure UIs: the ElasticsearchExporter Ingest Path and the OTLP Exporter Ingest Path.\\n\\n![IngestPath](/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/IngestPath.png)\\n\\n\\n### ElasticsearchExporter Ingest Path: \\n\\nThe [hostmetrics receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/hostmetricsreceiver/README.md#host-metrics-receiver) in OpenTelemetry collects system-level metrics such as CPU, memory, and disk usage from the host machine in OTel Schema. \\nThe ElasticsearchExporter ingest path leverages the [Hostmetrics Receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/hostmetricsreceiver/README.md#host-metrics-receiver) to generate host metrics in the OTel schema. We\'ve developed the [ElasticInfraMetricsProcessor](https://github.com/elastic/opentelemetry-collector-components/tree/main/processor/elasticinframetricsprocessor#elastic-infra-metrics-processor), which utilizes the [opentelemetry-lib](https://github.com/elastic/opentelemetry-lib/tree/main?tab=readme-ov-file#opentelemetry-lib) to convert these metrics into a format that Elastic UIs understand.\\n\\nFor example, the `system.network.io` OTel metric includes a `direction` attribute with values `receive` or `transmit`. These correspond to `system.network.in.bytes` and `system.network.out.bytes`, respectively, within Elastic. \\n\\nThe [processor](https://github.com/elastic/opentelemetry-collector-components/tree/main/processor/elasticinframetricsprocessor#elastic-infra-metrics-processor) then forwards these metrics to the [Elasticsearch Exporter](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/elasticsearchexporter#elasticsearch-exporter), now enhanced to support exporting metrics in ECS mode. The exporter sends the metrics to an Elasticsearch endpoint, lighting up the Infrastructure UIs with insightful data.\\n\\nTo utilize this path, you can deploy the collector from the Elastic Collector Distro, available [here](https://github.com/elastic/elastic-agent/blob/main/internal/pkg/otel/README.md). \\n\\n\\nAn example collector config for this Ingest Path:\\n```yaml\\nreceivers:\\n hostmetrics:\\n collection_interval: 10s\\n scrapers:\\n cpu:\\n metrics:\\n system.cpu.utilization:\\n enabled: true\\n system.cpu.logical.count:\\n enabled: true\\n memory:\\n metrics:\\n system.memory.utilization:\\n enabled: true\\n process:\\n metrics:\\n process.open_file_descriptors:\\n enabled: true\\n process.memory.utilization:\\n enabled: true\\n process.disk.operations:\\n enabled: true\\n network:\\n processes:\\n load:\\n disk:\\n filesystem:\\n\\nprocessors:\\n resourcedetection/system:\\n detectors: [\\"system\\", \\"ec2\\"]\\n elasticinframetrics:\\n\\nexporters: \\n logging:\\n verbosity: detailed\\n elasticsearch/metrics: \\n endpoints: \\n api_key: \\n mapping:\\n mode: ecs\\n\\nservice:\\n pipelines:\\n metrics/host:\\n receivers: [hostmetrics]\\n processors: [resourcedetection/system, elasticinframetrics]\\n exporters: [logging, elasticsearch/ metrics]\\n\\n```\\n\\nThe Elastic exporter path is ideal for users who would prefer using the custom Elastic Collector [Distro](https://github.com/elastic/elastic-agent/blob/main/internal/pkg/otel/README.md). This path includes the ElasticInfraMetricsProcessor, which sends data to Elasticsearch via Elasticsearch exporter.\\n\\n\\n### OTLP Exporter Ingest Path:\\n\\n \\nIn the OTLP Exporter Ingest path, the [hostmetrics receiver](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/hostmetricsreceiver/README.md#host-metrics-receiver) collects system-level metrics such as CPU, memory, and disk usage from the host machine in OTel Schema. These metrics are sent to the [OTLP Exporter](https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlpexporter#otlp-grpc-exporter), which forwards them to the [APM Server endpoint](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry-direct.html#apm-connect-open-telemetry-collector). The APM Server, using the same [opentelemetry-lib](https://github.com/elastic/opentelemetry-lib/tree/main?tab=readme-ov-file#opentelemetry-lib), converts these metrics into a format compatible with Elastic UIs. Subsequently, the APM Server pushes the metrics to Elasticsearch, powering the Infrastructure UIs.\\n\\nAn example collector configuration for the APM Ingest Path\\n\\n``` yaml\\nreceivers:\\n hostmetrics:\\n collection_interval: 10s\\n scrapers:\\n cpu:\\n metrics:\\n system.cpu.utilization:\\n enabled: true\\n system.cpu.logical.count:\\n enabled: true\\n memory:\\n metrics:\\n system.memory.utilization:\\n enabled: true\\n process:\\n metrics:\\n process.open_file_descriptors:\\n enabled: true\\n process.memory.utilization:\\n enabled: true\\n process.disk.operations:\\n enabled: true\\n network:\\n processes:\\n load:\\n disk:\\n filesystem:\\n\\nprocessors:\\n resourcedetection/system:\\n detectors: [\\"system\\"]\\n system:\\n hostname_sources: [\\"os\\"]\\n\\nexporters:\\n otlphttp:\\n endpoint: \\n tls:\\n insecure: false\\n headers:\\n Authorization: \\n logging:\\n verbosity: detailed\\n\\nservice:\\n pipelines:\\n metrics/host:\\n receivers: [hostmetrics]\\n processors: [resourcedetection/system]\\n exporters: [logging, otlphttp]\\n\\n\\n```\\n\\nThe OTLP Exporter Ingest path can help existing users who are already using Elastic APM and want to see the Infrastructure UIs populated as well. These users can use the default [OpenTelemetry Collector](https://github.com/open-telemetry/opentelemetry-collector-contrib?tab=readme-ov-file#opentelemetry-collector-contrib).\\n\\n\\n\\n## A glimpse of the Infrastructure UIs\\nThe Infrastructure UIs showcase both Host and Kubernetes level views. Below are some of the glimpses of the UIs\\n\\nThe Hosts Overview UI\\n\\n![HostUI](/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/HostUI.png)\\n\\nThe Hosts Inventory UI\\n![InventoryUI](/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/Inventory.png)\\n\\nThe Process-related Details of the Host\\n\\n\\n![Processes](/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/Processes.png)\\n\\nThe Kubernetes Inventory UI\\n\\n\\n![K8s](/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/K8s.png)\\n\\n\\nPod level Metrics\\n\\n![Pod Metrics](/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/Pod_Metrics.png)\\n\\n\\nOur next step is to create Infrastructure UIs powered by native OTel data, with dedicated OTel dashboards that run on this native data.\\n\\n## Conclusion\\nElastic\'s integration with OpenTelemetry simplifies the observability landscape and while we are diligently working to align ECS with OpenTelemetry’s semantic conventions, our immediate priority is to support our users by simplifying their experience. With this added support, we aim to deliver a seamless, end-to-end experience for those using OpenTelemetry with our application and infrastructure monitoring solutions. We are excited to see how our users will leverage these capabilities to gain deeper insights into their systems.\\n","code":"var Component=(()=>{var p=Object.create;var s=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(r,e)=>()=>(e||r((e={exports:{}}).exports,e),e.exports),b=(r,e)=>{for(var n in e)s(r,n,{get:e[n],enumerable:!0})},l=(r,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of u(e))!y.call(r,i)&&i!==n&&s(r,i,{get:()=>e[i],enumerable:!(o=d(e,i))||o.enumerable});return r};var w=(r,e,n)=>(n=r!=null?p(g(r)):{},l(e||!r||!r.__esModule?s(n,\\"default\\",{value:r,enumerable:!0}):n,r)),v=r=>l(s({},\\"__esModule\\",{value:!0}),r);var a=f((T,c)=>{c.exports=_jsx_runtime});var E={};b(E,{default:()=>m,frontmatter:()=>I});var t=w(a()),I={title:\\"Infrastructure monitoring with OpenTelemetry in Elastic Observability\\",slug:\\"infrastructure-monitoring-with-opentelemetry-in-elastic-observability\\",date:\\"2024-07-24\\",description:\\"Integrating OpenTelemetry with Elastic Observability for Application and Infrastructure Monitoring Solutions.\\",author:[{slug:\\"ishleen-kaur\\"}],image:\\"Monitoring-infra-with-Otel.png\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"metrics\\"},{slug:\\"cloud-monitoring\\"}]};function h(r){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",p:\\"p\\",pre:\\"pre\\",...r.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"At Elastic, we recently made a decision to fully embrace OpenTelemetry as the premier data collection framework. As an Observability engineer, I firmly believe that vendor agnosticism is essential for delivering the greatest value to our customers. By committing to OpenTelemetry, we are not only staying current with technological advancements but also driving them forward. This investment positions us at the forefront of the industry, championing a more open and flexible approach to observability.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic donated \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/index.html\\",rel:\\"nofollow\\",children:\\"Elastic Common Schema (ECS)\\"}),\\" to OpenTelemetry and is actively working to \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"converge\\"}),\\" it with semantic conventions. In the meantime, we are dedicated to support our users by ensuring they don\\\\u2019t have to navigate different standards. Our goal is to provide a seamless end-to-end experience while using OpenTelemetry with our application and infrastructure monitoring solutions. This commitment allows users to benefit from the best of both worlds without any friction.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we explore how to use the OpenTelemetry (OTel) collector to capture core system metrics from various sources such as AWS EC2, Google Compute, Kubernetes clusters, and individual systems running Linux or MacOS.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"powering-infrastructure-uis-with-two-ingest-paths\\",children:\\"Powering Infrastructure UIs with Two Ingest Paths\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic users who wish to have OpenTelemetry as their data collection mechanism can now monitor the health of the hosts where the OpenTelemetry collector is deployed using the Hosts and Inventory UIs available in Elastic Observability.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic offers two distinct ingest paths to power Infrastructure UIs: the ElasticsearchExporter Ingest Path and the OTLP Exporter Ingest Path.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/IngestPath.png\\",alt:\\"IngestPath\\",width:\\"15584\\",height:\\"6080\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"elasticsearchexporter-ingest-path\\",children:\\"ElasticsearchExporter Ingest Path:\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/hostmetricsreceiver/README.md#host-metrics-receiver\\",rel:\\"nofollow\\",children:\\"hostmetrics receiver\\"}),` in OpenTelemetry collects system-level metrics such as CPU, memory, and disk usage from the host machine in OTel Schema.\\nThe ElasticsearchExporter ingest path leverages the `,(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/hostmetricsreceiver/README.md#host-metrics-receiver\\",rel:\\"nofollow\\",children:\\"Hostmetrics Receiver\\"}),\\" to generate host metrics in the OTel schema. We\'ve developed the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-collector-components/tree/main/processor/elasticinframetricsprocessor#elastic-infra-metrics-processor\\",rel:\\"nofollow\\",children:\\"ElasticInfraMetricsProcessor\\"}),\\", which utilizes the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-lib/tree/main?tab=readme-ov-file#opentelemetry-lib\\",rel:\\"nofollow\\",children:\\"opentelemetry-lib\\"}),\\" to convert these metrics into a format that Elastic UIs understand.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For example, the \\",(0,t.jsx)(e.code,{children:\\"system.network.io\\"}),\\" OTel metric includes a \\",(0,t.jsx)(e.code,{children:\\"direction\\"}),\\" attribute with values \\",(0,t.jsx)(e.code,{children:\\"receive\\"}),\\" or \\",(0,t.jsx)(e.code,{children:\\"transmit\\"}),\\". These correspond to \\",(0,t.jsx)(e.code,{children:\\"system.network.in.bytes\\"}),\\" and \\",(0,t.jsx)(e.code,{children:\\"system.network.out.bytes\\"}),\\", respectively, within Elastic.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-collector-components/tree/main/processor/elasticinframetricsprocessor#elastic-infra-metrics-processor\\",rel:\\"nofollow\\",children:\\"processor\\"}),\\" then forwards these metrics to the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/elasticsearchexporter#elasticsearch-exporter\\",rel:\\"nofollow\\",children:\\"Elasticsearch Exporter\\"}),\\", now enhanced to support exporting metrics in ECS mode. The exporter sends the metrics to an Elasticsearch endpoint, lighting up the Infrastructure UIs with insightful data.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To utilize this path, you can deploy the collector from the Elastic Collector Distro, available \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-agent/blob/main/internal/pkg/otel/README.md\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"An example collector config for this Ingest Path:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`receivers:\\n hostmetrics:\\n collection_interval: 10s\\n scrapers:\\n cpu:\\n metrics:\\n system.cpu.utilization:\\n enabled: true\\n system.cpu.logical.count:\\n enabled: true\\n memory:\\n metrics:\\n system.memory.utilization:\\n enabled: true\\n process:\\n metrics:\\n process.open_file_descriptors:\\n enabled: true\\n process.memory.utilization:\\n enabled: true\\n process.disk.operations:\\n enabled: true\\n network:\\n processes:\\n load:\\n disk:\\n filesystem:\\n\\nprocessors:\\n resourcedetection/system:\\n detectors: [\\"system\\", \\"ec2\\"]\\n elasticinframetrics:\\n\\nexporters: \\n logging:\\n verbosity: detailed\\n elasticsearch/metrics: \\n endpoints: \\n api_key: \\n mapping:\\n mode: ecs\\n\\nservice:\\n pipelines:\\n metrics/host:\\n receivers: [hostmetrics]\\n processors: [resourcedetection/system, elasticinframetrics]\\n exporters: [logging, elasticsearch/ metrics]\\n\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The Elastic exporter path is ideal for users who would prefer using the custom Elastic Collector \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-agent/blob/main/internal/pkg/otel/README.md\\",rel:\\"nofollow\\",children:\\"Distro\\"}),\\". This path includes the ElasticInfraMetricsProcessor, which sends data to Elasticsearch via Elasticsearch exporter.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"otlp-exporter-ingest-path\\",children:\\"OTLP Exporter Ingest Path:\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the OTLP Exporter Ingest path, the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/hostmetricsreceiver/README.md#host-metrics-receiver\\",rel:\\"nofollow\\",children:\\"hostmetrics receiver\\"}),\\" collects system-level metrics such as CPU, memory, and disk usage from the host machine in OTel Schema. These metrics are sent to the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlpexporter#otlp-grpc-exporter\\",rel:\\"nofollow\\",children:\\"OTLP Exporter\\"}),\\", which forwards them to the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry-direct.html#apm-connect-open-telemetry-collector\\",rel:\\"nofollow\\",children:\\"APM Server endpoint\\"}),\\". The APM Server, using the same \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-lib/tree/main?tab=readme-ov-file#opentelemetry-lib\\",rel:\\"nofollow\\",children:\\"opentelemetry-lib\\"}),\\", converts these metrics into a format compatible with Elastic UIs. Subsequently, the APM Server pushes the metrics to Elasticsearch, powering the Infrastructure UIs.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"An example collector configuration for the APM Ingest Path\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`receivers:\\n hostmetrics:\\n collection_interval: 10s\\n scrapers:\\n cpu:\\n metrics:\\n system.cpu.utilization:\\n enabled: true\\n system.cpu.logical.count:\\n enabled: true\\n memory:\\n metrics:\\n system.memory.utilization:\\n enabled: true\\n process:\\n metrics:\\n process.open_file_descriptors:\\n enabled: true\\n process.memory.utilization:\\n enabled: true\\n process.disk.operations:\\n enabled: true\\n network:\\n processes:\\n load:\\n disk:\\n filesystem:\\n\\nprocessors:\\n resourcedetection/system:\\n detectors: [\\"system\\"]\\n system:\\n hostname_sources: [\\"os\\"]\\n\\nexporters:\\n otlphttp:\\n endpoint: \\n tls:\\n insecure: false\\n headers:\\n Authorization: \\n logging:\\n verbosity: detailed\\n\\nservice:\\n pipelines:\\n metrics/host:\\n receivers: [hostmetrics]\\n processors: [resourcedetection/system]\\n exporters: [logging, otlphttp]\\n\\n\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The OTLP Exporter Ingest path can help existing users who are already using Elastic APM and want to see the Infrastructure UIs populated as well. These users can use the default \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib?tab=readme-ov-file#opentelemetry-collector-contrib\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Collector\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"a-glimpse-of-the-infrastructure-uis\\",children:\\"A glimpse of the Infrastructure UIs\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Infrastructure UIs showcase both Host and Kubernetes level views. Below are some of the glimpses of the UIs\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Hosts Overview UI\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/HostUI.png\\",alt:\\"HostUI\\",width:\\"3370\\",height:\\"1870\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[`The Hosts Inventory UI\\n`,(0,t.jsx)(e.img,{src:\\"/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/Inventory.png\\",alt:\\"InventoryUI\\",width:\\"2674\\",height:\\"1570\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Process-related Details of the Host\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/Processes.png\\",alt:\\"Processes\\",width:\\"3380\\",height:\\"1652\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Kubernetes Inventory UI\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/K8s.png\\",alt:\\"K8s\\",width:\\"3634\\",height:\\"1428\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Pod level Metrics\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/Pod_Metrics.png\\",alt:\\"Pod Metrics\\",width:\\"2964\\",height:\\"1418\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Our next step is to create Infrastructure UIs powered by native OTel data, with dedicated OTel dashboards that run on this native data.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\'s integration with OpenTelemetry simplifies the observability landscape and while we are diligently working to align ECS with OpenTelemetry\\\\u2019s semantic conventions, our immediate priority is to support our users by simplifying their experience. With this added support, we aim to deliver a seamless, end-to-end experience for those using OpenTelemetry with our application and infrastructure monitoring solutions. We are excited to see how our users will leverage these capabilities to gain deeper insights into their systems.\\"})]})}function m(r={}){let{wrapper:e}=r.components||{};return e?(0,t.jsx)(e,{...r,children:(0,t.jsx)(h,{...r})}):h(r)}return v(E);})();\\n;return Component;"},"_id":"articles/infrastructure-monitoring-with-opentelemetry-in-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/infrastructure-monitoring-with-opentelemetry-in-elastic-observability.mdx","sourceFileName":"infrastructure-monitoring-with-opentelemetry-in-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/infrastructure-monitoring-with-opentelemetry-in-elastic-observability"},"type":"Article","imageUrl":"/assets/images/infrastructure-monitoring-with-opentelemetry-in-elastic-observability/Monitoring-infra-with-Otel.png","readingTime":"8 min read","url":"/infrastructure-monitoring-with-opentelemetry-in-elastic-observability","headings":[{"level":2,"title":"Powering Infrastructure UIs with Two Ingest Paths","href":"#powering-infrastructure-uis-with-two-ingest-paths"},{"level":3,"title":"ElasticsearchExporter Ingest Path: ","href":"#elasticsearchexporter-ingest-path-"},{"level":3,"title":"OTLP Exporter Ingest Path:","href":"#otlp-exporter-ingest-path"},{"level":2,"title":"A glimpse of the Infrastructure UIs","href":"#a-glimpse-of-the-infrastructure-uis"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Ingesting and analyzing Prometheus metrics with Elastic Observability","slug":"ingesting-analyzing-prometheus-metrics-observability","date":"2023-10-09","description":"In this blog post, we will showcase the integration of Prometheus with Elastic, emphasizing how Elastic elevates metrics monitoring through extensive historical analytics, anomaly detection, and forecasting, all in a cost-effective manner.","image":"illustration-machine-learning-anomaly-v2.png","author":[{"slug":"jenny-morris","type":"Author","_raw":{}}],"tags":[{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"prometheus","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the world of monitoring and observability, [Prometheus](https://prometheus.io/) has grown into the de-facto standard for monitoring in cloud-native environments because of its robust data collection mechanism, flexible querying capabilities, and integration with other tools for rich dashboarding and visualization.\\n\\nPrometheus is primarily built for short-term metric storage, typically retaining data in-memory or on local disk storage, with a focus on real-time monitoring and alerting rather than historical analysis. While it offers valuable insights into current metric values and trends, it may pose economic challenges and fall short of the robust functionalities and capabilities necessary for in-depth historical analysis, long-term trend detection, and forecasting. This is particularly evident in large environments with a substantial number of targets or high data ingestion rates, where metric data accumulates rapidly.\\n\\nNumerous organizations assess their unique needs and explore avenues to augment their Prometheus monitoring and observability capabilities. One effective approach is integrating Prometheus with Elastic\xae. In this blog post, we will showcase the integration of Prometheus with Elastic, emphasizing how Elastic elevates metrics monitoring through extensive historical analytics, anomaly detection, and forecasting, all in a cost-effective manner.\\n\\n## Integrate Prometheus with Elastic seamlessly\\n\\nOrganizations that have configured their cloud-native applications to expose metrics in Prometheus format can seamlessly transmit the metrics to Elastic by using [Prometheus integration](https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-prometheus.html). Elastic enables organizations to monitor their metrics in conjunction with all other data gathered through [Elastic\'s extensive integrations](https://www.elastic.co/integrations/data-integrations).\\n\\nGo to Integrations and find the Prometheus integration.\\n\\n![1 - integrations](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-1-integrations.png)\\n\\nTo gather metrics from Prometheus servers, the Elastic Agent is employed, with central management of Elastic agents handled through the [Fleet server](https://www.elastic.co/guide/en/fleet/current/fleet-overview.html).\\n\\n![2 - set up integration](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-2-set-up-prometheus-integration.png)\\n\\nAfter enrolling the Elastic Agent in the Fleet, users can choose from the following methods to ingest Prometheus metrics into Elastic.\\n\\n### 1. Prometheus collectors\\n\\n[The Prometheus collectors](https://docs.elastic.co/integrations/prometheus#prometheus-exporters-collectors) connect to the Prometheus server and pull metrics or scrape metrics from a Prometheus exporter.\\n\\n![3 - Prometheus collectors](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-3-prometheus-collectors.png)\\n\\n### 2. Prometheus queries\\n\\n[The Prometheus queries](https://docs.elastic.co/integrations/prometheus#prometheus-queries-promql) execute specific Prometheus queries against [Prometheus Query API](https://prometheus.io/docs/prometheus/latest/querying/api/#expression-queries).\\n\\n![4 - Prometheus queries](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-4-promtheus-queries.png)\\n\\n### 3. Prometheus remote-write\\n\\n[The Prometheus remote_write](https://docs.elastic.co/integrations/prometheus#prometheus-server-remote-write) can receive metrics from a Prometheus server that has configured the [remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) setting.\\n\\n![5 - Prometheus remote-write](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-5-prometheus-remote-write.png)\\n\\nAfter your Prometheus metrics are ingested, you have the option to visualize your data graphically within the [Metrics Explorer](https://www.elastic.co/guide/en/observability/current/explore-metrics.html) and further segment it based on labels, such as hosts, containers, and more.\\n\\n![10 - metrics explorer](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-10-metrics-explorer.png)\\n\\nYou can also query your metrics data in [Discover](https://www.elastic.co/guide/en/kibana/current/discover.html) and explore the fields of your individual documents within the details panel.\\n\\n![7 - expanded document](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-7-expanded-doc.png)\\n\\n## Storing historical metrics with Elastic’s data tiering mechanism\\n\\nBy exporting Prometheus metrics to Elasticsearch, organizations can extend the retention period and gain the ability to analyze metrics historically. Elastic optimizes data storage and access based on the frequency of data usage and the performance requirements of different data sets. The goal is to efficiently manage and store data, ensuring that it remains accessible when needed while keeping storage costs in check.\\n\\n![8 - hot to frozen flow chart](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-8-hot-to-frozen.png)\\n\\nAfter ingesting Prometheus metrics data, you have various retention options. You can set the duration for data to reside in the hot tier, which utilizes high IO hardware (SSD) and is more expensive. Alternatively, you can move the Prometheus metrics to the warm tier, employing cost-effective hardware like spinning disks (HDD) while maintaining consistent and efficient search performance. The cold tier mirrors the infrastructure of the warm tier for primary data but utilizes S3 for replica storage. Elastic automatically recovers replica indices from S3 in case of node or disk failure, ensuring search performance comparable to the warm tier while reducing disk cost.\\n\\nThe [frozen tier](https://www.elastic.co/blog/introducing-elasticsearch-frozen-tier-searchbox-on-s3) allows direct searching of data stored in S3 or an object store, without the need for rehydration. The purpose is to further reduce storage costs for Prometheus metrics data that is less frequently accessed. By moving historical data into the frozen tier, organizations can optimize their storage infrastructure, ensuring that the recent, critical data remains in higher-performance tiers while less frequently accessed data is stored economically in the frozen tier. This way, organizations can perform historical analysis and trend detection, identify patterns and make informed decisions, and maintain compliance with regulatory standards in a cost-effective manner.\\n\\nAn alternative way to store your cloud-native metrics more efficiently is to use [Elastic Time Series Data Stream](https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html) (TSDS). TSDS can store your metrics data more efficiently with [~70% less disk space](https://www.elastic.co/blog/70-percent-storage-savings-for-metrics-with-elastic-observability) than a regular data stream. The [downsampling](https://www.elastic.co/guide/en/elasticsearch/reference/current/downsampling.html) functionality will further reduce the storage required by rolling up metrics within a fixed time interval into a single summary metric. This not only assists organizations in cutting down on storage expenses for metric data but also simplifies the metric infrastructure, making it easier for users to correlate metrics with logs and traces through a unified interface.\\n\\n## Advanced analytics\\n\\nBesides [Metrics Explorer](https://www.elastic.co/guide/en/observability/current/explore-metrics.html) and [Discover](https://www.elastic.co/guide/en/kibana/current/discover.html), Elasticsearch\xae provides more advanced analytics capabilities and empowers organizations to gain deeper, more valuable insights into their Prometheus metrics data.\\n\\nOut of the box, Prometheus integration provides a default overview dashboard.\\n\\n![9 - adv analytics](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-9-advacned-analytics.png)\\n\\nFrom Metrics Explorer or Discover, users can also easily edit their Prometheus metrics visualization in [Elastic Lens](https://www.elastic.co/kibana/kibana-lens) or create new visualizations from Lens.\\n\\n![6 - metrics explorer](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-6-metrics-explorer.png)\\n\\n![11 - green bars](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-11-green-bars.png)\\n\\nElastic Lens enables users to explore and visualize data intuitively through dynamic visualizations. This user-friendly interface eliminates the need for complex query languages, making data analysis accessible to a broader audience. Elasticsearch also offers other powerful visualization methods with [aggregations](https://www.elastic.co/guide/en/kibana/current/add-aggregation-based-visualization-panels.html) and [filters](https://www.youtube.com/watch?v=I8NtctS33F0), enabling users to perform advanced analytics on their Prometheus metrics data, including short-term and historical data. To learn more, check out the [how-to series: Kibana](https://www.elastic.co/videos/training-how-to-series-stack).\\n\\n## Anomaly detection and forecasting\\n\\nWhen analyzing data, maintaining a constant watch on the screen is simply not feasible, especially when dealing with millions of time series of Prometheus metrics. Engineers frequently encounter the challenge of differentiating normal from abnormal data points, which involves analyzing historical data patterns — a process that can be exceedingly time consuming and often exceeds human capabilities. Thus, there is a pressing need for a more intelligent approach to detect anomalies efficiently.\\n\\nSetting up alerts may seem like an obvious solution, but relying solely on rule-based alerts with static thresholds can be problematic. What\'s normal on a Wednesday at 9:00 a.m. might be entirely different from a Sunday at 2:00 a.m. This often leads to complex and hard-to-maintain rules or wide alert ranges that end up missing crucial issues. Moreover, as your business, infrastructure, users, and products evolve, these fixed rules don\'t keep up, resulting in lots of false positives or, even worse, important issues slipping through the cracks without detection. A more intelligent and adaptable approach is needed to ensure accurate and timely anomaly detection.\\n\\nElastic\'s machine learning anomaly detection excels in such scenarios. It automatically models the normal behavior of your Prometheus data, learning trends, and identifying anomalies, thereby reducing false positives and improving mean time to resolution (MTTR). With over 13 years of development experience in this field, Elastic has emerged as a trusted industry leader.\\n\\nThe key advantage of Elastic\'s machine learning anomaly detection lies in its unsupervised learning approach. By continuously observing real-time data, it acquires an understanding of the data\'s behavior over time. This includes grasping daily and weekly patterns, enabling it to establish a normalcy range of expected behavior. Behind the scenes, it constructs statistical models that allow accurate predictions, promptly identifying any unexpected variations. In cases where emerging data exhibits unusual trends, you can seamlessly integrate with alerting systems, operationalizing this valuable insight.\\n\\n![12 - LPO](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-12-LPO.png)\\n\\nMachine learning\'s ability to project into the future, forecasting data trends one day, a week, or even a month ahead, equips engineers not only with reporting capabilities but also with pattern recognition and failure prediction based on historical Prometheus data. This plays a crucial role in maintaining mission-critical workloads, offering organizations a proactive monitoring approach. By foreseeing and addressing issues before they escalate, organizations can avert downtime, cut costs, optimize resource utilization, and ensure uninterrupted availability of their vital applications and services.\\n\\n[Creating a machine learning job](https://www.elastic.co/guide/en/machine-learning/current/ml-ad-run-jobs.html#ml-ad-create-job) for your Prometheus data is a straightforward task with a few simple steps. Simply specify the data index and set the desired time range in the single metric view. The machine learning job will then automatically process the historical data, building statistical models behind the scenes. These models will enable the system to predict trends and identify anomalies effectively, providing valuable and actionable insights for your monitoring needs.\\n\\n![13 - create ML job](/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-13-creating-ML-job.png)\\n\\nIn essence, Elastic machine learning empowers us to harness the capabilities of data scientists and effectively apply them in monitoring Prometheus metrics. By seamlessly detecting anomalies and predicting potential issues in advance, Elastic machine learning bridges the gap and enables IT professionals to benefit from the insights derived from advanced data analysis. This practical and accessible approach to anomaly detection equips organizations with a proactive stance toward maintaining the reliability of their systems.\\n\\n## Try it out\\n\\n[Start a free trial](https://www.elastic.co/cloud/cloud-trial-overview) on Elastic Cloud and [ingest your Prometheus metrics into Elastic](https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-prometheus.html). Enhance your Prometheus monitoring with Elastic Observability. Stay ahead of potential issues with advanced AI/ML anomaly detection and prediction capabilities. Eliminate data silos, reduce costs, and enhance overall response efficiency.\\n\\nElevate your monitoring capabilities with Elastic today!\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var m=Object.create;var r=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),y=(i,e)=>{for(var a in e)r(i,a,{get:e[a],enumerable:!0})},o=(i,e,a,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of u(e))!f.call(i,n)&&n!==a&&r(i,n,{get:()=>e[n],enumerable:!(s=g(e,n))||s.enumerable});return i};var b=(i,e,a)=>(a=i!=null?m(p(i)):{},o(e||!i||!i.__esModule?r(a,\\"default\\",{value:i,enumerable:!0}):a,i)),v=i=>o(r({},\\"__esModule\\",{value:!0}),i);var c=w((E,l)=>{l.exports=_jsx_runtime});var x={};y(x,{default:()=>d,frontmatter:()=>z});var t=b(c()),z={title:\\"Ingesting and analyzing Prometheus metrics with Elastic Observability\\",slug:\\"ingesting-analyzing-prometheus-metrics-observability\\",date:\\"2023-10-09\\",description:\\"In this blog post, we will showcase the integration of Prometheus with Elastic, emphasizing how Elastic elevates metrics monitoring through extensive historical analytics, anomaly detection, and forecasting, all in a cost-effective manner.\\",author:[{slug:\\"jenny-morris\\"}],image:\\"illustration-machine-learning-anomaly-v2.png\\",tags:[{slug:\\"metrics\\"},{slug:\\"prometheus\\"},{slug:\\"kubernetes\\"}]};function h(i){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",p:\\"p\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"In the world of monitoring and observability, \\",(0,t.jsx)(e.a,{href:\\"https://prometheus.io/\\",rel:\\"nofollow\\",children:\\"Prometheus\\"}),\\" has grown into the de-facto standard for monitoring in cloud-native environments because of its robust data collection mechanism, flexible querying capabilities, and integration with other tools for rich dashboarding and visualization.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Prometheus is primarily built for short-term metric storage, typically retaining data in-memory or on local disk storage, with a focus on real-time monitoring and alerting rather than historical analysis. While it offers valuable insights into current metric values and trends, it may pose economic challenges and fall short of the robust functionalities and capabilities necessary for in-depth historical analysis, long-term trend detection, and forecasting. This is particularly evident in large environments with a substantial number of targets or high data ingestion rates, where metric data accumulates rapidly.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Numerous organizations assess their unique needs and explore avenues to augment their Prometheus monitoring and observability capabilities. One effective approach is integrating Prometheus with Elastic\\\\xAE. In this blog post, we will showcase the integration of Prometheus with Elastic, emphasizing how Elastic elevates metrics monitoring through extensive historical analytics, anomaly detection, and forecasting, all in a cost-effective manner.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"integrate-prometheus-with-elastic-seamlessly\\",children:\\"Integrate Prometheus with Elastic seamlessly\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Organizations that have configured their cloud-native applications to expose metrics in Prometheus format can seamlessly transmit the metrics to Elastic by using \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-prometheus.html\\",rel:\\"nofollow\\",children:\\"Prometheus integration\\"}),\\". Elastic enables organizations to monitor their metrics in conjunction with all other data gathered through \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations\\",rel:\\"nofollow\\",children:\\"Elastic\'s extensive integrations\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Go to Integrations and find the Prometheus integration.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-1-integrations.png\\",alt:\\"1 - integrations\\",width:\\"1999\\",height:\\"1232\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To gather metrics from Prometheus servers, the Elastic Agent is employed, with central management of Elastic agents handled through the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/fleet-overview.html\\",rel:\\"nofollow\\",children:\\"Fleet server\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-2-set-up-prometheus-integration.png\\",alt:\\"2 - set up integration\\",width:\\"1999\\",height:\\"1119\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"After enrolling the Elastic Agent in the Fleet, users can choose from the following methods to ingest Prometheus metrics into Elastic.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"1-prometheus-collectors\\",children:\\"1. Prometheus collectors\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/prometheus#prometheus-exporters-collectors\\",rel:\\"nofollow\\",children:\\"The Prometheus collectors\\"}),\\" connect to the Prometheus server and pull metrics or scrape metrics from a Prometheus exporter.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-3-prometheus-collectors.png\\",alt:\\"3 - Prometheus collectors\\",width:\\"1786\\",height:\\"1540\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"2-prometheus-queries\\",children:\\"2. Prometheus queries\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/prometheus#prometheus-queries-promql\\",rel:\\"nofollow\\",children:\\"The Prometheus queries\\"}),\\" execute specific Prometheus queries against \\",(0,t.jsx)(e.a,{href:\\"https://prometheus.io/docs/prometheus/latest/querying/api/#expression-queries\\",rel:\\"nofollow\\",children:\\"Prometheus Query API\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-4-promtheus-queries.png\\",alt:\\"4 - Prometheus queries\\",width:\\"1626\\",height:\\"1524\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"3-prometheus-remote-write\\",children:\\"3. Prometheus remote-write\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/prometheus#prometheus-server-remote-write\\",rel:\\"nofollow\\",children:\\"The Prometheus remote_write\\"}),\\" can receive metrics from a Prometheus server that has configured the \\",(0,t.jsx)(e.a,{href:\\"https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write\\",rel:\\"nofollow\\",children:\\"remote_write\\"}),\\" setting.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-5-prometheus-remote-write.png\\",alt:\\"5 - Prometheus remote-write\\",width:\\"1664\\",height:\\"832\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"After your Prometheus metrics are ingested, you have the option to visualize your data graphically within the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/explore-metrics.html\\",rel:\\"nofollow\\",children:\\"Metrics Explorer\\"}),\\" and further segment it based on labels, such as hosts, containers, and more.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-10-metrics-explorer.png\\",alt:\\"10 - metrics explorer\\",width:\\"1999\\",height:\\"1020\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can also query your metrics data in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/discover.html\\",rel:\\"nofollow\\",children:\\"Discover\\"}),\\" and explore the fields of your individual documents within the details panel.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-7-expanded-doc.png\\",alt:\\"7 - expanded document\\",width:\\"1999\\",height:\\"1063\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"storing-historical-metrics-with-elastics-data-tiering-mechanism\\",children:\\"Storing historical metrics with Elastic\\\\u2019s data tiering mechanism\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"By exporting Prometheus metrics to Elasticsearch, organizations can extend the retention period and gain the ability to analyze metrics historically. Elastic optimizes data storage and access based on the frequency of data usage and the performance requirements of different data sets. The goal is to efficiently manage and store data, ensuring that it remains accessible when needed while keeping storage costs in check.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-8-hot-to-frozen.png\\",alt:\\"8 - hot to frozen flow chart\\",width:\\"1966\\",height:\\"1218\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"After ingesting Prometheus metrics data, you have various retention options. You can set the duration for data to reside in the hot tier, which utilizes high IO hardware (SSD) and is more expensive. Alternatively, you can move the Prometheus metrics to the warm tier, employing cost-effective hardware like spinning disks (HDD) while maintaining consistent and efficient search performance. The cold tier mirrors the infrastructure of the warm tier for primary data but utilizes S3 for replica storage. Elastic automatically recovers replica indices from S3 in case of node or disk failure, ensuring search performance comparable to the warm tier while reducing disk cost.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/introducing-elasticsearch-frozen-tier-searchbox-on-s3\\",rel:\\"nofollow\\",children:\\"frozen tier\\"}),\\" allows direct searching of data stored in S3 or an object store, without the need for rehydration. The purpose is to further reduce storage costs for Prometheus metrics data that is less frequently accessed. By moving historical data into the frozen tier, organizations can optimize their storage infrastructure, ensuring that the recent, critical data remains in higher-performance tiers while less frequently accessed data is stored economically in the frozen tier. This way, organizations can perform historical analysis and trend detection, identify patterns and make informed decisions, and maintain compliance with regulatory standards in a cost-effective manner.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"An alternative way to store your cloud-native metrics more efficiently is to use \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html\\",rel:\\"nofollow\\",children:\\"Elastic Time Series Data Stream\\"}),\\" (TSDS). TSDS can store your metrics data more efficiently with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/70-percent-storage-savings-for-metrics-with-elastic-observability\\",rel:\\"nofollow\\",children:\\"~70% less disk space\\"}),\\" than a regular data stream. The \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/downsampling.html\\",rel:\\"nofollow\\",children:\\"downsampling\\"}),\\" functionality will further reduce the storage required by rolling up metrics within a fixed time interval into a single summary metric. This not only assists organizations in cutting down on storage expenses for metric data but also simplifies the metric infrastructure, making it easier for users to correlate metrics with logs and traces through a unified interface.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"advanced-analytics\\",children:\\"Advanced analytics\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Besides \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/explore-metrics.html\\",rel:\\"nofollow\\",children:\\"Metrics Explorer\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/discover.html\\",rel:\\"nofollow\\",children:\\"Discover\\"}),\\", Elasticsearch\\\\xAE provides more advanced analytics capabilities and empowers organizations to gain deeper, more valuable insights into their Prometheus metrics data.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Out of the box, Prometheus integration provides a default overview dashboard.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-9-advacned-analytics.png\\",alt:\\"9 - adv analytics\\",width:\\"1999\\",height:\\"1074\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"From Metrics Explorer or Discover, users can also easily edit their Prometheus metrics visualization in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/kibana/kibana-lens\\",rel:\\"nofollow\\",children:\\"Elastic Lens\\"}),\\" or create new visualizations from Lens.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-6-metrics-explorer.png\\",alt:\\"6 - metrics explorer\\",width:\\"1999\\",height:\\"872\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-11-green-bars.png\\",alt:\\"11 - green bars\\",width:\\"1470\\",height:\\"660\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic Lens enables users to explore and visualize data intuitively through dynamic visualizations. This user-friendly interface eliminates the need for complex query languages, making data analysis accessible to a broader audience. Elasticsearch also offers other powerful visualization methods with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/add-aggregation-based-visualization-panels.html\\",rel:\\"nofollow\\",children:\\"aggregations\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://www.youtube.com/watch?v=I8NtctS33F0\\",rel:\\"nofollow\\",children:\\"filters\\"}),\\", enabling users to perform advanced analytics on their Prometheus metrics data, including short-term and historical data. To learn more, check out the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/videos/training-how-to-series-stack\\",rel:\\"nofollow\\",children:\\"how-to series: Kibana\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"anomaly-detection-and-forecasting\\",children:\\"Anomaly detection and forecasting\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"When analyzing data, maintaining a constant watch on the screen is simply not feasible, especially when dealing with millions of time series of Prometheus metrics. Engineers frequently encounter the challenge of differentiating normal from abnormal data points, which involves analyzing historical data patterns \\\\u2014 a process that can be exceedingly time consuming and often exceeds human capabilities. Thus, there is a pressing need for a more intelligent approach to detect anomalies efficiently.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Setting up alerts may seem like an obvious solution, but relying solely on rule-based alerts with static thresholds can be problematic. What\'s normal on a Wednesday at 9:00 a.m. might be entirely different from a Sunday at 2:00 a.m. This often leads to complex and hard-to-maintain rules or wide alert ranges that end up missing crucial issues. Moreover, as your business, infrastructure, users, and products evolve, these fixed rules don\'t keep up, resulting in lots of false positives or, even worse, important issues slipping through the cracks without detection. A more intelligent and adaptable approach is needed to ensure accurate and timely anomaly detection.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\'s machine learning anomaly detection excels in such scenarios. It automatically models the normal behavior of your Prometheus data, learning trends, and identifying anomalies, thereby reducing false positives and improving mean time to resolution (MTTR). With over 13 years of development experience in this field, Elastic has emerged as a trusted industry leader.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The key advantage of Elastic\'s machine learning anomaly detection lies in its unsupervised learning approach. By continuously observing real-time data, it acquires an understanding of the data\'s behavior over time. This includes grasping daily and weekly patterns, enabling it to establish a normalcy range of expected behavior. Behind the scenes, it constructs statistical models that allow accurate predictions, promptly identifying any unexpected variations. In cases where emerging data exhibits unusual trends, you can seamlessly integrate with alerting systems, operationalizing this valuable insight.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-12-LPO.png\\",alt:\\"12 - LPO\\",width:\\"1999\\",height:\\"898\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Machine learning\'s ability to project into the future, forecasting data trends one day, a week, or even a month ahead, equips engineers not only with reporting capabilities but also with pattern recognition and failure prediction based on historical Prometheus data. This plays a crucial role in maintaining mission-critical workloads, offering organizations a proactive monitoring approach. By foreseeing and addressing issues before they escalate, organizations can avert downtime, cut costs, optimize resource utilization, and ensure uninterrupted availability of their vital applications and services.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-ad-run-jobs.html#ml-ad-create-job\\",rel:\\"nofollow\\",children:\\"Creating a machine learning job\\"}),\\" for your Prometheus data is a straightforward task with a few simple steps. Simply specify the data index and set the desired time range in the single metric view. The machine learning job will then automatically process the historical data, building statistical models behind the scenes. These models will enable the system to predict trends and identify anomalies effectively, providing valuable and actionable insights for your monitoring needs.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/ingesting-analyzing-prometheus-metrics-observability/elastic-blog-13-creating-ML-job.png\\",alt:\\"13 - create ML job\\",width:\\"1999\\",height:\\"1113\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In essence, Elastic machine learning empowers us to harness the capabilities of data scientists and effectively apply them in monitoring Prometheus metrics. By seamlessly detecting anomalies and predicting potential issues in advance, Elastic machine learning bridges the gap and enables IT professionals to benefit from the insights derived from advanced data analysis. This practical and accessible approach to anomaly detection equips organizations with a proactive stance toward maintaining the reliability of their systems.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/cloud-trial-overview\\",rel:\\"nofollow\\",children:\\"Start a free trial\\"}),\\" on Elastic Cloud and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-prometheus.html\\",rel:\\"nofollow\\",children:\\"ingest your Prometheus metrics into Elastic\\"}),\\". Enhance your Prometheus monitoring with Elastic Observability. Stay ahead of potential issues with advanced AI/ML anomaly detection and prediction capabilities. Eliminate data silos, reduce costs, and enhance overall response efficiency.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elevate your monitoring capabilities with Elastic today!\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return v(x);})();\\n;return Component;"},"_id":"articles/ingesting-analyzing-prometheus-metrics-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/ingesting-analyzing-prometheus-metrics-elastic-observability.mdx","sourceFileName":"ingesting-analyzing-prometheus-metrics-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/ingesting-analyzing-prometheus-metrics-elastic-observability"},"type":"Article","imageUrl":"/assets/images/ingesting-analyzing-prometheus-metrics-observability/illustration-machine-learning-anomaly-v2.png","readingTime":"9 min read","url":"/ingesting-analyzing-prometheus-metrics-observability","headings":[{"level":2,"title":"Integrate Prometheus with Elastic seamlessly","href":"#integrate-prometheus-with-elastic-seamlessly"},{"level":3,"title":"1. Prometheus collectors","href":"#1-prometheus-collectors"},{"level":3,"title":"2. Prometheus queries","href":"#2-prometheus-queries"},{"level":3,"title":"3. Prometheus remote-write","href":"#3-prometheus-remote-write"},{"level":2,"title":"Storing historical metrics with Elastic’s data tiering mechanism","href":"#storing-historical-metrics-with-elastics-data-tiering-mechanism"},{"level":2,"title":"Advanced analytics","href":"#advanced-analytics"},{"level":2,"title":"Anomaly detection and forecasting","href":"#anomaly-detection-and-forecasting"},{"level":2,"title":"Try it out","href":"#try-it-out"}]},{"title":"Introducing Elastic Distribution for OpenTelemetry Python","slug":"elastic-opentelemetry-distribution-python","date":"2024-07-07","description":"Announcing the first alpha release of the Elastic Distribution for OpenTelemetry Python. See how easy it is to instrument your Python applications with OpenTelemetry in this blog post.","image":"python.jpg","author":[{"slug":"riccardo-magliocchetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"python","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nWe are delighted to announce the alpha release of the [Elastic Distribution for OpenTelemetry Python](https://github.com/elastic/elastic-otel-python#readme). This project is a customized OpenTelemetry distribution that allows us to configure better defaults for using OpenTelemetry with the Elastic cloud offering.\\n\\n## Background\\n\\nElastic is standardizing on OpenTelemetry (OTel) for observability and security data collection. As part of that effort, we are [providing distributions of the OpenTelemetry Language SDKs](https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions). We have recently released alpha distributions for [Java](https://github.com/elastic/elastic-otel-java#readme), [.NET](https://github.com/elastic/elastic-otel-dotnet#readme) and [Node.js](https://github.com/elastic/elastic-otel-node#readme). Our [Android](https://github.com/elastic/apm-agent-android#readme) and [iOS](https://github.com/elastic/apm-agent-ios#readme) SDKs have been OpenTelemetry-based from the start. The Elastic Distribution for OpenTelemetry Python is the latest addition.\\n\\n## Design choices\\n\\nWe have chosen to provide a lean distribution that does not install all the instrumentations by default but that instead provides tools\\nto do so. We leverage the `opentelemetry-bootstrap` tool provided by OpenTelemetry Python project to scan the packages installed in your\\nenvironment and recognizes libraries we are able to instrument. This tool can just report the instrumentations available and optionally\\nis able to install them as well.\\nThis allows you to avoid installing packages you are not going to need or instrument libraries you are not interested in tracing.\\n\\n## Getting started\\n\\nTo get started with Elastic Distribution for OpenTelemetry Python you need to install the package `elastic-opentelemetry` in your project\\nenvironment. We\'ll use `pip` in our examples but you are free to use any python package and environment manager of your choice.\\n\\n```bash\\npip install elastic-opentelemetry\\n```\\n\\nOnce you have installed our distro you\'ll have also the `opentelemetry-bootstrap` command available. Running it:\\n\\n```bash\\nopentelemetry-bootstrap\\n```\\n\\nwill list all available packages for your instrumentation, e.g. you can expect something like the following:\\n\\n```\\nopentelemetry-instrumentation-asyncio==0.46b0\\nopentelemetry-instrumentation-dbapi==0.46b0\\nopentelemetry-instrumentation-logging==0.46b0\\nopentelemetry-instrumentation-sqlite3==0.46b0\\nopentelemetry-instrumentation-threading==0.46b0\\nopentelemetry-instrumentation-urllib==0.46b0\\nopentelemetry-instrumentation-wsgi==0.46b0\\nopentelemetry-instrumentation-grpc==0.46b0\\nopentelemetry-instrumentation-requests==0.46b0\\nopentelemetry-instrumentation-system-metrics==0.46b0\\nopentelemetry-instrumentation-urllib3==0.46b0\\n```\\n\\nIt also provides a command option to install the packages automatically\\n\\n```bash\\nopentelemetry-bootstrap --action=install\\n```\\n\\nIt is advised to run this command every time you release a new version of your application so that you can install or just revise any\\ninstrumentation packages for your code.\\n\\nSome environment variables are needed to provide the needed configuration for instrumenting your services. These mostly\\nconcern the destination of your traces but also for easily identifying your service.\\nA *service name* is required to have your service distinguishable from the others. Then you need to provide\\nthe *authorization* headers for authentication with Elastic Observability cloud and the Elastic cloud endpoint where the data is sent.\\n\\nThe API Key you get from your Elastic cloud serverless project must be *URL-encoded*, you can do that with the following Python snippet:\\n\\n```python\\nfrom urllib.parse import quote\\nquote(\\"ApiKey )\\n```\\n\\nOnce you have all your configuration values you can export via environment variables as below:\\n\\n```bash\\nexport OTEL_RESOURCE_ATTRIBUTES=service.name=\\nexport OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=\\"\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=\\n```\\n\\nWe are done with the configuration and the last piece of the puzzle is wrapping your service invocation with\\n`opentelemetry-instrument`, the wrapper that provides *zero-code instrumentation*. *Zero-code* (or Automatic) instrumentation means\\nthat the distribution will set up the OpenTelemetry SDK and enable all the previously installed instrumentations for you.\\nUnfortunately *Zero-code* instrumentation does not cover all libraries and some — web frameworks in particular — will require minimal manual\\nconfiguration.\\n\\nFor a web service running with gunicorn it may look like:\\n\\n```bash\\nopentelemetry-instrument gunicorn main:app\\n```\\n\\nThe result is an observable application using the industry-standard [OpenTelemetry](https://opentelemetry.io/) — offering high-quality instrumentation of many popular Python libraries, a portable API to avoid vendor lock-in and an active community.\\n\\nUsing Elastic Observability, some out-of-the-box benefits you can expect are: rich trace viewing, Service maps, integrated metrics and log analysis, and more.\\n\\n![trace sample screenshot](/assets/images/elastic-opentelemetry-distribution-python/traces-original.png)\\n\\n## What\'s next?\\n\\nElastic is committed to helping OpenTelemetry succeed and to helping our customers use OpenTelemetry effectively in their systems. Last year, we [donated ECS](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/) and continue to work on integrating it with OpenTelemetry Semantic Conventions. More recently, we are working on [donating our eBPF-based profiler](https://www.elastic.co/observability-labs/blog/elastic-profiling-agent-acceptance-opentelemetry) to OpenTelemetry. We contribute to many of the language SDKs and other OpenTelemetry projects.\\n\\nIn the Python ecosystem we are active reviewers and contributors of both the [opentelemetry-python](https://github.com/open-telemetry/opentelemetry-python/) and [opentelemetry-python-contrib](https://github.com/open-telemetry/opentelemetry-python-contrib/) repositories.\\n\\nThe Elastic Distribution for OpenTelemetry Python is currently an alpha. Please [try it out](https://github.com/elastic/elastic-otel-python/) and let us know if it might work for you. Watch for the [latest releases here](https://github.com/elastic/elastic-otel-python/releases). You can engage with us on [the project issue tracker](https://github.com/elastic/elastic-otel-python/issues).\\n\\nWe are eager to know your use cases to help you succeed in your Observability journey.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n## Resources\\n\\n- https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions\\n- https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\n- https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications\\n- https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js\\n- https://www.elastic.co/observability-labs/blog/manual-instrumentation-python-apps-opentelemetry\\n- https://www.elastic.co/observability-labs/blog/auto-instrumentation-python-applications-opentelemetry\\n- https://www.elastic.co/observability-labs/blog/opentelemetry-observability\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var y=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var g=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var o in e)r(n,o,{get:e[o],enumerable:!0})},a=(n,e,o,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of m(e))!b.call(n,i)&&i!==o&&r(n,i,{get:()=>e[i],enumerable:!(l=u(e,i))||l.enumerable});return n};var w=(n,e,o)=>(o=n!=null?p(y(n)):{},a(e||!n||!n.__esModule?r(o,\\"default\\",{value:n,enumerable:!0}):o,n)),v=n=>a(r({},\\"__esModule\\",{value:!0}),n);var c=g((k,s)=>{s.exports=_jsx_runtime});var O={};f(O,{default:()=>d,frontmatter:()=>T});var t=w(c()),T={title:\\"Introducing Elastic Distribution for OpenTelemetry Python\\",slug:\\"elastic-opentelemetry-distribution-python\\",date:\\"2024-07-07\\",description:\\"Announcing the first alpha release of the Elastic Distribution for OpenTelemetry Python. See how easy it is to instrument your Python applications with OpenTelemetry in this blog post.\\",author:[{slug:\\"riccardo-magliocchetti\\"}],image:\\"python.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"python\\"},{slug:\\"instrumentation\\"}]};function h(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"We are delighted to announce the alpha release of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-python#readme\\",rel:\\"nofollow\\",children:\\"Elastic Distribution for OpenTelemetry Python\\"}),\\". This project is a customized OpenTelemetry distribution that allows us to configure better defaults for using OpenTelemetry with the Elastic cloud offering.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"background\\",children:\\"Background\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic is standardizing on OpenTelemetry (OTel) for observability and security data collection. As part of that effort, we are \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions\\",rel:\\"nofollow\\",children:\\"providing distributions of the OpenTelemetry Language SDKs\\"}),\\". We have recently released alpha distributions for \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java#readme\\",rel:\\"nofollow\\",children:\\"Java\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-dotnet#readme\\",rel:\\"nofollow\\",children:\\".NET\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-node#readme\\",rel:\\"nofollow\\",children:\\"Node.js\\"}),\\". Our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-android#readme\\",rel:\\"nofollow\\",children:\\"Android\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-ios#readme\\",rel:\\"nofollow\\",children:\\"iOS\\"}),\\" SDKs have been OpenTelemetry-based from the start. The Elastic Distribution for OpenTelemetry Python is the latest addition.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"design-choices\\",children:\\"Design choices\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[`We have chosen to provide a lean distribution that does not install all the instrumentations by default but that instead provides tools\\nto do so. We leverage the `,(0,t.jsx)(e.code,{children:\\"opentelemetry-bootstrap\\"}),` tool provided by OpenTelemetry Python project to scan the packages installed in your\\nenvironment and recognizes libraries we are able to instrument. This tool can just report the instrumentations available and optionally\\nis able to install them as well.\\nThis allows you to avoid installing packages you are not going to need or instrument libraries you are not interested in tracing.`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"getting-started\\",children:\\"Getting started\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To get started with Elastic Distribution for OpenTelemetry Python you need to install the package \\",(0,t.jsx)(e.code,{children:\\"elastic-opentelemetry\\"}),` in your project\\nenvironment. We\'ll use `,(0,t.jsx)(e.code,{children:\\"pip\\"}),\\" in our examples but you are free to use any python package and environment manager of your choice.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`pip install elastic-opentelemetry\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once you have installed our distro you\'ll have also the \\",(0,t.jsx)(e.code,{children:\\"opentelemetry-bootstrap\\"}),\\" command available. Running it:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`opentelemetry-bootstrap\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"will list all available packages for your instrumentation, e.g. you can expect something like the following:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`opentelemetry-instrumentation-asyncio==0.46b0\\nopentelemetry-instrumentation-dbapi==0.46b0\\nopentelemetry-instrumentation-logging==0.46b0\\nopentelemetry-instrumentation-sqlite3==0.46b0\\nopentelemetry-instrumentation-threading==0.46b0\\nopentelemetry-instrumentation-urllib==0.46b0\\nopentelemetry-instrumentation-wsgi==0.46b0\\nopentelemetry-instrumentation-grpc==0.46b0\\nopentelemetry-instrumentation-requests==0.46b0\\nopentelemetry-instrumentation-system-metrics==0.46b0\\nopentelemetry-instrumentation-urllib3==0.46b0\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"It also provides a command option to install the packages automatically\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`opentelemetry-bootstrap --action=install\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:`It is advised to run this command every time you release a new version of your application so that you can install or just revise any\\ninstrumentation packages for your code.`}),`\\n`,(0,t.jsxs)(e.p,{children:[`Some environment variables are needed to provide the needed configuration for instrumenting your services. These mostly\\nconcern the destination of your traces but also for easily identifying your service.\\nA `,(0,t.jsx)(e.em,{children:\\"service name\\"}),` is required to have your service distinguishable from the others. Then you need to provide\\nthe `,(0,t.jsx)(e.em,{children:\\"authorization\\"}),\\" headers for authentication with Elastic Observability cloud and the Elastic cloud endpoint where the data is sent.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The API Key you get from your Elastic cloud serverless project must be \\",(0,t.jsx)(e.em,{children:\\"URL-encoded\\"}),\\", you can do that with the following Python snippet:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`from urllib.parse import quote\\nquote(\\"ApiKey )\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you have all your configuration values you can export via environment variables as below:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`export OTEL_RESOURCE_ATTRIBUTES=service.name=\\nexport OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=\\"\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[`We are done with the configuration and the last piece of the puzzle is wrapping your service invocation with\\n`,(0,t.jsx)(e.code,{children:\\"opentelemetry-instrument\\"}),\\", the wrapper that provides \\",(0,t.jsx)(e.em,{children:\\"zero-code instrumentation\\"}),\\". \\",(0,t.jsx)(e.em,{children:\\"Zero-code\\"}),` (or Automatic) instrumentation means\\nthat the distribution will set up the OpenTelemetry SDK and enable all the previously installed instrumentations for you.\\nUnfortunately `,(0,t.jsx)(e.em,{children:\\"Zero-code\\"}),` instrumentation does not cover all libraries and some \\\\u2014 web frameworks in particular \\\\u2014 will require minimal manual\\nconfiguration.`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"For a web service running with gunicorn it may look like:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`opentelemetry-instrument gunicorn main:app\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The result is an observable application using the industry-standard \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" \\\\u2014 offering high-quality instrumentation of many popular Python libraries, a portable API to avoid vendor lock-in and an active community.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Using Elastic Observability, some out-of-the-box benefits you can expect are: rich trace viewing, Service maps, integrated metrics and log analysis, and more.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/elastic-opentelemetry-distribution-python/traces-original.png\\",alt:\\"trace sample screenshot\\",width:\\"1278\\",height:\\"1151\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"whats-next\\",children:\\"What\'s next?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic is committed to helping OpenTelemetry succeed and to helping our customers use OpenTelemetry effectively in their systems. Last year, we \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"donated ECS\\"}),\\" and continue to work on integrating it with OpenTelemetry Semantic Conventions. More recently, we are working on \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-profiling-agent-acceptance-opentelemetry\\",rel:\\"nofollow\\",children:\\"donating our eBPF-based profiler\\"}),\\" to OpenTelemetry. We contribute to many of the language SDKs and other OpenTelemetry projects.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the Python ecosystem we are active reviewers and contributors of both the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-python/\\",rel:\\"nofollow\\",children:\\"opentelemetry-python\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-python-contrib/\\",rel:\\"nofollow\\",children:\\"opentelemetry-python-contrib\\"}),\\" repositories.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The Elastic Distribution for OpenTelemetry Python is currently an alpha. Please \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-python/\\",rel:\\"nofollow\\",children:\\"try it out\\"}),\\" and let us know if it might work for you. Watch for the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-python/releases\\",rel:\\"nofollow\\",children:\\"latest releases here\\"}),\\". You can engage with us on \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-python/issues\\",rel:\\"nofollow\\",children:\\"the project issue tracker\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We are eager to know your use cases to help you succeed in your Observability journey.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"resources\\",children:\\"Resources\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/blog/elastic-opentelemetry-sdk-distributions\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-dotnet-applications\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/observability-labs/blog/elastic-opentelemetry-distribution-node-js\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/manual-instrumentation-python-apps-opentelemetry\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/observability-labs/blog/manual-instrumentation-python-apps-opentelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/auto-instrumentation-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/observability-labs/blog/auto-instrumentation-python-applications-opentelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/observability-labs/blog/opentelemetry-observability\\"})}),`\\n`]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(O);})();\\n;return Component;"},"_id":"articles/introducing-the-elastic-distribution-for-opentelemetry-python.mdx","_raw":{"sourceFilePath":"articles/introducing-the-elastic-distribution-for-opentelemetry-python.mdx","sourceFileName":"introducing-the-elastic-distribution-for-opentelemetry-python.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/introducing-the-elastic-distribution-for-opentelemetry-python"},"type":"Article","imageUrl":"/assets/images/elastic-opentelemetry-distribution-python/python.jpg","readingTime":"5 min read","url":"/elastic-opentelemetry-distribution-python","headings":[{"level":2,"title":"Background","href":"#background"},{"level":2,"title":"Design choices","href":"#design-choices"},{"level":2,"title":"Getting started","href":"#getting-started"},{"level":2,"title":"What\'s next?","href":"#whats-next"},{"level":2,"title":"Resources","href":"#resources"}]},{"title":"Gaining new perspectives beyond logging: An introduction to application performance monitoring","slug":"introduction-apm-tracing-logging","date":"2023-05-30","description":"Change is on the horizon for the world of logging. In this post, we’ll outline a recommended journey for moving from just logging to a fully integrated solution with logs, traces, and APM.","image":"log-management-720x420_(2).jpeg","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"java","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}}],"body":{"raw":"\\n## Prioritize customer experience with APM and tracing\\n\\nEnterprise software development and operations has become an interesting space. We have some incredibly powerful tools at our disposal, yet as an industry, we have failed to adopt many of these tools that can make our lives easier. One such tool that is currently underutilized is [application performance monitoring](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment) (APM) and tracing, despite the fact that OpenTelemetry has made it possible to adopt at low friction.\\n\\nLogging, however, is ubiquitous. Every software application has logs of some kind, and the default workflow for troubleshooting (even today) is to go from exceptions experienced by customers and systems to the logs and start from there to find a solution.\\n\\nThere are various challenges with this, one of the main ones being that logs often do not give enough information to solve the problem. Many services today return ambiguous 500 errors with little or nothing to go on. What if there isn’t an error or log file at all or the problem is that the system is very slow? Logging alone cannot help solve these problems. This leaves users with half broken systems and poor user experiences. We’ve all been on the wrong side of this, and it can be incredibly frustrating.\\n\\nThe question I find myself asking is why does the customer experience often come second to errors? If the customer experience is a top priority, then a strategy should be in place to adopt tracing and APM and make this as important as logging. Users should stop going to logs by default and thinking primarily in logs, as many are doing today. This will also come with some required changes to mental models.\\n\\nWhat’s the path to get there? That’s exactly what we will explore in this blog post. We will start by talking about supporting organizational changes, and then we’ll outline a recommended journey for moving from just logging to a fully integrated solution with logs, traces, and APM.\\n\\n## Cultivating a new monitoring mindset: How to drive APM and tracing adoption\\n\\nTo get teams to shift their troubleshooting mindset, what organizational changes need to be made?\\n\\nInitially, businesses should consider strategic priorities and goals that need to be shared broadly among the teams. One thing that can help drive this in a very large organization is to consider an entire product team devoted to Observability or a CoE (Center of Excellence) with its own roadmap and priorities.\\n\\nThis team (either virtual or permanent) should start with the customer in mind and work backward, starting with key questions like: What do I need to collect? What do I need to observe? How do I act? Once team members understand the answers to these questions, they can start to think about the technology decisions needed to drive those outcomes.\\n\\nFrom a tracing and APM perspective, the areas of greatest concern are the customer experience, service level objectives, and service level outcomes. From here, organizations can start to implement programs of work to continuously improve and share knowledge across teams. This will help to align teams around a common framework with shared goals.\\n\\nIn the next few sections, we will go through a four step journey to help you maximize your success with APM and tracing. This journey will take you through the following key steps on your journey to successful APM adoption:\\n\\n1. **Ingest:** What choices do you have to make to get tracing activated and start ingesting trace data into your observability tools?\\n2. **Integrate:** How does tracing integrate with logs to enable full end-to-end observability, and what else beyond simple tracing can you utilize to get even better resolution on your data?\\n3. **Analytics and AIOPs:** Improve the customer experience and reduce the noise through machine learning.\\n4. **Scale and total cost of ownership:** Roll out enterprise-wide tracing and adopt strategies to deal with data volume.\\n\\n## 1. Ingest\\n\\nIngesting data for APM purposes generally involves “instrumenting” the application. In this section, we will explore methods for instrumenting applications, talk a little bit about sampling, and finally wrap up with a note on using common schemas for data representation.\\n\\n### Getting started with instrumentation\\n\\nWhat options do we have for ingesting APM and trace data? There are many, many options we will discuss to help guide you, but first let\'s take a step back. APM has a deep history — in very first implementations of APM, people were concerned mainly with timing methods, like this below:\\n\\n![timing methods](/assets/images/introduction-apm-tracing-logging/blog-elastic-timing-methods.png)\\n\\nUsually you had a configuration file to specify which methods you wanted to time, and the APM implementation would instrument the specified code with method timings.\\n\\nFrom here things started to evolve, and one of the first additions to APM was to add in tracing.\\n\\nFor Java, it’s fairly trivial to implement a system to do this by using what\'s known as a Java agent. You just specify -javagent command line argument, and the agent code gets access to the dynamic compilation routines within Java so it can modify the code before it is compiled into machine code, allowing you to “wrap” specific methods with timing or tracing routines. So, auto instrumenting Java was one of the first things that the original APM vendors did.\\n\\n[OpenTelemetry has agents like this](https://opentelemetry.io/docs/instrumentation/java/automatic/), and most observability vendors that offer APM solutions have their own proprietary ways of doing this, often with more advanced and differing features from the open source tooling.\\n\\nThings have moved on since then, and Node.JS and Python are now popular.\\n\\nAs a result, ways of auto instrumenting these language runtimes have appeared, which mostly work by injecting the libraries into the code before starting them up. OpenTelemetry has a way of doing this on Kubernetes with an Operator and sidecar [here](https://github.com/open-telemetry/opentelemetry-operator/blob/main/README.md), which supports Python, Node.JS, Java, and DotNet.\\n\\nThe other alternative is to start adding APM and tracing API calls into your own code, which is not dissimilar to adding logging functionality. You may even wish to create an abstraction in your code to deal with this cross-cutting concern, although this is less of a problem now that there are open standards with which you can implement this.\\n\\nYou can see an example of how to add OpenTelemetry spans and attributes to your code for manual instrumentation below and [here](https://github.com/davidgeorgehope/ChatGPTMonitoringWithOtel/blob/main/monitor.py).\\n\\n```python\\nfrom flask import Flask\\nimport monitor # Import the module\\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\nimport urllib\\nimport os\\n\\nfrom opentelemetry import trace\\nfrom opentelemetry.sdk.resources import SERVICE_NAME, Resource\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\\n\\n\\n# Service name is required for most backends\\nresource = Resource(attributes={\\n SERVICE_NAME: \\"your-service-name\\"\\n})\\n\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(OTLPSpanExporter(endpoint=os.getenv(\'OTEL_EXPORTER_OTLP_ENDPOINT\'),\\n headers=\\"Authorization=Bearer%20\\"+os.getenv(\'OTEL_EXPORTER_OTLP_AUTH_HEADER\')))\\n\\nprovider.add_span_processor(processor)\\ntrace.set_tracer_provider(provider)\\ntracer = trace.get_tracer(__name__)\\nRequestsInstrumentor().instrument()\\n\\n# Initialize Flask app and instrument it\\napp = Flask(__name__)\\n\\n@app.route(\\"/completion\\")\\n@tracer.start_as_current_span(\\"do_work\\")\\ndef completion():\\n span = trace.get_current_span()\\n if span:\\n span.set_attribute(\\"completion_count\\",1)\\n```\\n\\nBy implementing APM in this way, you could even eliminate the need to do any logging by storing all your required logging information within span attributes, exceptions, and metrics. The downside is that you can only do this with code that you own, so you will not be able to remove all logs this way.\\n\\n### Sampling\\n\\nMany people don’t realize that APM is an expensive process. It adds a lot of CPU cycles and memory to your applications, and although there is a lot of value to be had, there are certainly trade-offs to be made.\\n\\nShould you sample everything 100% and eat the cost? Or should you think about an intelligent trade-off with fewer samples or even tail-based sampling, which many products commonly support? Here, we will talk about the two most common sampling techniques — head-based sampling and tail-based sampling — to help you decide.\\n\\n**Head-based sampling** \\nIn this approach, sampling decisions are made at the beginning of a trace, typically at the entry point of a service or application. A fixed rate of traces is sampled, and this decision propagates through all the services involved in a distributed trace.\\n\\nWith head-based sampling, you can control the rate using a configuration, allowing you to control the percentage of requests that are sampled and reported to the APM server. For instance, a sampling rate of 0.5 means that only 50% of requests are sampled and sent to the server. This is useful for reducing the amount of collected data while still maintaining a representative sample of your application\'s performance.\\n\\n**Tail-based sampling** \\nUnlike head-based sampling, tail-based sampling makes sampling decisions after the entire trace has been completed. This allows for more intelligent sampling decisions based on the actual trace data, such as only reporting traces with errors or traces that exceed a certain latency threshold.\\n\\nWe recommend tail-based sampling because it has the highest likelihood of reducing the noise and helping you focus on the most important issues. It also helps keep costs down on the data store side. A downside of tail-based sampling, however, is that it results in more data being generated from APM agents. This could use more CPU and memory on your application.\\n\\n### OpenTelemetry Semantic Conventions and Elastic Common Schema\\n\\nOpenTelemetry prescribes Semantic Conventions, or Semantic Attributes, to establish uniform names for various operations and data types. Adhering to these conventions fosters standardization across codebases, libraries, and platforms, ultimately streamlining the monitoring process.\\n\\nCreating OpenTelemetry spans for tracing is flexible, allowing implementers to annotate them with operation-specific attributes. These spans represent particular operations within and between systems, often involving widely recognized protocols like HTTP or database calls. To effectively represent and analyze a span in monitoring systems, supplementary information is necessary, contingent upon the protocol and operation type.\\n\\nUnifying attribution methods across different languages is essential for operators to easily correlate and cross-analyze telemetry from polyglot microservices without needing to grasp language-specific nuances.\\n\\nElastic\'s recent contribution of the Elastic Common Schema to OpenTelemetry enhances Semantic Conventions to encompass logs and security.\\n\\nAbiding by a shared schema yields considerable benefits, enabling operators to rapidly identify intricate interactions and correlate logs, metrics, and traces, thereby expediting root cause analysis and reducing time spent searching for logs and pinpointing specific time frames.\\n\\nWe advocate for adhering to established schemas such as ECS when defining trace, metrics, and log data in your applications, particularly when developing new code. This practice will conserve time and effort when addressing issues.\\n\\n## 2. Integrate\\n\\nIntegrations are very important for APM. How well your solution can integrate with other tools and technologies such as cloud, as well as its ability to integrate logs and metrics into your tracing data, is critical to fully understand the customer experience. In addition, most APM vendors have adjacent solutions for [synthetic monitoring](https://www.elastic.co/observability/synthetic-monitoring) and profiling to gain deeper perspectives to supercharge your APM. We will explore these topics in the following section.\\n\\n### APM + logs = superpowers!\\n\\nBecause APM agents can instrument code, they can also instrument code that is being used for logging. This way, you can capture log lines directly within APM. [This is normally simple to enable](https://www.elastic.co/guide/en/observability/master/logs-send-application.html).\\n\\nWith this enabled, you will also get automated injection of useful fields like these:\\n\\n- service.name, service.version, service.environment\\n- trace.id, transaction.id, error.id\\n\\nThis means log messages will be automatically correlated with transactions as shown below, making it far easier to reduce mean time to resolution (MTTR) and find the needle in the haystack:\\n\\n![latency distribution](/assets/images/introduction-apm-tracing-logging/blog-elastic-latency-distribution.png)\\n\\nIf this is available to you, we highly recommend turning it on.\\n\\n### Deploying APM inside Kubernetes\\n\\nIt is common for people to want to deploy APM inside a Kubernetes environment, and tracing is critical for monitoring applications in cloud-native environments. There are three different ways you can tackle this.\\n\\n**1. Auto instrumentation using sidecars** \\nWith Kubernetes, it is possible to use an init container and something that will modify Kubernetes manifests on the fly to auto instrument your applications.\\n\\nThe init container will be used simply to copy the required library or jar file into the container at startup that you need to the main Kubernetes pod. Then, you can use [Kustomize](https://kustomize.io/) to add the required command line arguments to bootstrap your agents.\\n\\nIf you are not familiar with it, Kustomize adds, removes, or modifies Kubernetes manifests on the fly. It is even available as a flag to the Kubernetes CLI — simply execute kubectl -k.\\n\\nOpenTelemetry has an [operator](https://github.com/open-telemetry/opentelemetry-operator/blob/main/README.md) that does all this for you automatically (without the need for Kustomize) for Java, DotNet, Python, and Node.JS, and many vendors also have their own operator or [helm charts](https://www.elastic.co/guide/en/apm/attacher/current/apm-attacher.html) that can achieve the same result.\\n\\n**2. Baking APM into containers or code** \\nA second option for deploying out APM in Kubernetes — and indeed any containerized environment — is using Docker to bake the APM agents and configuration into a dockerfile.\\n\\nHave a look at an example here using the OpenTelemetry Java Agent:\\n\\n```dockerfile\\n# Use the official OpenJDK image as the base image\\nFROM openjdk:11-jre-slim\\n\\n# Set up environment variables\\nENV APP_HOME /app\\nENV OTEL_VERSION 1.7.0-alpha\\nENV OTEL_JAVAAGENT_URL https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v${OTEL_VERSION}/opentelemetry-javaagent-${OTEL_VERSION}-all.jar\\n\\n# Create the application directory\\nRUN mkdir $APP_HOME\\nWORKDIR $APP_HOME\\n\\n# Download the OpenTelemetry Java agent\\nADD ${OTEL_JAVAAGENT_URL} /otel-javaagent.jar\\n\\n# Add your Java application JAR file\\nCOPY your-java-app.jar $APP_HOME/your-java-app.jar\\n\\n# Expose the application port (e.g. 8080)\\nEXPOSE 8080\\n\\n# Configure the OpenTelemetry Java agent and run the application\\nCMD java -javaagent:/otel-javaagent.jar \\\\\\n -Dotel.resource.attributes=service.name=your-service-name \\\\\\n -Dotel.exporter.otlp.endpoint=your-otlp-endpoint:4317 \\\\\\n -Dotel.exporter.otlp.insecure=true \\\\\\n -jar your-java-app.jar\\n```\\n\\n**3. Tracing using a service mesh (Envoy/Istio)** \\nThe final option you have here is if you are using a service mesh. A service mesh is a dedicated infrastructure layer for handling service-to-service communication in a microservices architecture. It provides a transparent, scalable, and efficient way to manage and control the communication between services, enabling developers to focus on building application features without worrying about inter-service communication complexities.\\n\\nThe great thing about this is that we can activate tracing within the proxy and therefore get visibility into requests between services. We don’t have to change any code or even run APM agents for this; we simply turn on the OpenTelemetry collector that exists within the proxy — therefore this is likely the lowest overhead solution. [Learn more about this option](https://www.envoyproxy.io/docs/envoy/latest/start/sandboxes/opentelemetry).\\n\\n### Synthetics Universal Profiling\\n\\nMost APM vendors have add ons to the primary APM use cases. Typically we see synthetics and [continuous profiling](https://www.elastic.co/observability/universal-profiling) being added to APM solutions. APM can integrate with both, and there is some good value in bringing these technologies together to give even more insights into issues.\\n\\n**Synthetics** \\nSynthetic monitoring is a method used to measure the performance, availability, and reliability of web applications, websites, and APIs by simulating user interactions and traffic. It involves creating scripts or automated tests that mimic real user behavior, such as navigating through pages, filling out forms, or clicking buttons, and then running these tests periodically from different locations and devices.\\n\\nThis gives Development and Operations teams the ability to spot problems far earlier than they might otherwise, catching issues before real users do in many cases.\\n\\nSynthetics can be integrated with APM — inject an APM agent into the website when the script runs, so even if you didn’t put end user monitoring into your website initially, it can be injected at run time. This usually happens without any input from the user. From there, a tracing id for each request can be passed down through the various layers of the system, allowing teams to follow the request all the way from the synthetics script to the lowest levels of the application stack such as the database.\\n\\n![observability rainbow sandals](/assets/images/introduction-apm-tracing-logging/blog-elastic-rainbow-sandals.png)\\n\\n**Universal profiling** \\n“Profiling” is a dynamic method of analyzing the complexity of a program, such as CPU utilization or the frequency and duration of function calls. With profiling, you can locate exactly which parts of your application are consuming the most resources. [“Continuous profiling”](https://www.elastic.co/observability/universal-profiling) is a more powerful version of profiling that adds the dimension of time. By understanding your system’s resources over time, you can then locate, debug, and fix issues related to performance.\\n\\nUniversal profiling is a further extension of this, which allows you to capture profile information about all of the code running in your system all the time. Using a technology like [eBPF](https://www.elastic.co/blog/ebpf-observability-security-workload-profiling) can allow you to see _all_ the function calls in your systems, including into things like the Kubernetes runtime. Doing this gives you the ability to finally see unknown unknowns — things you didn’t know were problems. This is very different from APM, which is really about tracking individual traces and requests and the overall customer experience. Universal profiling is about overcoming those issues you didn’t even know existed and even answering the question “What is my most expensive line of code?”\\n\\nUniversal profiling can be linked into APM, showing you profiles that occurred during a specific customer issue, for example, or by linking profiles directly to traces by looking at the global state that exists at the thread level. These technologies can work wonders when used together.\\n\\nTypically, profiles are viewed as “flame graphs” shown below. The boxes represent the amount of “on-cpu” time spent executing a particular function.\\n\\n![observability universal profiling](/assets/images/introduction-apm-tracing-logging/blog-elastic-universal-profiling.png)\\n\\n## 3. Analytics and AIOps\\n\\nThe interesting thing about APM is it opens up a whole new world of analytics versus just logs. All of a sudden, you have access to the information flows from _inside_ applications.\\n\\nThis allows you to easily capture things like the amount of money a specific customer is currently spending on your most critical ecommerce store, or look at failed trades in a brokerage app to see how much lost revenue those failures are impacting. You can even then apply machine learning algorithms to project future spend or look at anomalies occurring in this data, giving you a new window into how your business runs.\\n\\nIn this section, we will look at ways to do this and how to get the most out of this new world, as well as how to apply AIOps practices to this new data. We will also discuss getting SLIs and SLOs setup for APM data.\\n\\n### Getting business data into your traces\\n\\nThere are generally two ways of getting business data into your traces. You can modify code and add in Span attributes, an example of which is available [here](https://github.com/davidgeorgehope/ChatGPTMonitoringWithOtel/blob/main/monitor.py) and shown below. Or you can write an extension or a plugin, which has the benefit of avoiding code changes. OpenTelemetry supports [adding extensions in its auto-instrumentation agents](https://opentelemetry.io/docs/instrumentation/java/extensions/). Most other APM vendors usually have something similar.\\n\\n```python\\ndef count_completion_requests_and_tokens(func):\\n @wraps(func)\\n def wrapper(*args, **kwargs):\\n counters[\'completion_count\'] += 1\\n response = func(*args, **kwargs)\\n\\n token_count = response.usage.total_tokens\\n prompt_tokens = response.usage.prompt_tokens\\n completion_tokens = response.usage.completion_tokens\\n cost = calculate_cost(response)\\n strResponse = json.dumps(response)\\n\\n # Set OpenTelemetry attributes\\n span = trace.get_current_span()\\n if span:\\n span.set_attribute(\\"completion_count\\", counters[\'completion_count\'])\\n span.set_attribute(\\"token_count\\", token_count)\\n span.set_attribute(\\"prompt_tokens\\", prompt_tokens)\\n span.set_attribute(\\"completion_tokens\\", completion_tokens)\\n span.set_attribute(\\"model\\", response.model)\\n span.set_attribute(\\"cost\\", cost)\\n span.set_attribute(\\"response\\", strResponse)\\n return response\\n return wrapper\\n```\\n\\n### Using business data for fun and profit\\n\\nOnce you have the business data in your traces, you can start to have some fun with it. Take a look at the example below for a financial services fraud team. Here we are tracking transactions — average transaction value for our larger business customers. Crucially, we can see if there are any unusual transactions.\\n\\n![customer count](/assets/images/introduction-apm-tracing-logging/blog-elastic-customer-count.png)\\n\\nA lot of this is powered by machine learning, which can classify transactions or do [anomaly detection](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability). Once you start capturing the data, it is possible to do a lot of useful things like this, and with a flexible platform, integrating machine learning models into this process becomes a breeze.\\n\\n![fraud 12-h](/assets/images/introduction-apm-tracing-logging/blog-elastic-fraud-12h.png)\\n\\n### SLIs and SLOs\\n\\nService level indicators (SLIs) and service level objectives (SLOs) serve as critical components for maintaining and enhancing application performance. SLIs, which represent key performance metrics such as latency, error rate, and throughput, help quantify an application\'s performance, while SLOs establish target performance levels to meet user expectations.\\n\\nBy selecting relevant SLIs and setting achievable SLOs, organizations can better monitor their application\'s performance using APM tools. Continually evaluating and adjusting SLIs and SLOs in response to changes in application requirements, user expectations, or the competitive landscape ensures that the application remains competitive and delivers an exceptional user experience.\\n\\nIn order to define and track SLIs and SLOs, APM becomes a critical perspective that is needed for understanding the user experience. Once APM is implemented, we recommend that organizations perform the following steps.\\n\\n- Define SLOs and SLIs required to track them.\\n- Define SLO budgets and how they are calculated. Reflect business’ perspective and set realistic targets.\\n- Define SLIs to be measured from a user experience perspective.\\n- Define different alerting and paging rules, page only on customer facing SLO degradations, record symptomatic alerts, notify on critical symptomatic alerts.\\n\\nSynthetic monitoring and end user monitoring (EUM) can also help with getting even more data required to understand latency, throughput, and error rate from the user’s perspective, where it is critical to get good business focused metrics and data from.\\n\\n## 4. Scale and total cost of ownership\\n\\nWith increased perspectives, customers often run into scalability and total cost of ownership issues. All this new data can be overwhelming. Luckily there are various techniques you can use to deal with this. Tracing itself can actually help with volume challenges because you can decompose unstructured logs and combine them with traces, which leads to additional efficiency. You can also use different sampling methods to deal with scale challenges (i.e., both techniques we previously mentioned).\\n\\nIn addition to this, for large enterprise scale, we can use streaming pipelines like Kafka or Pulsar to manage the data volumes. This has an additional benefit that you get for free: if you take down the systems consuming the data or they face outages, it is less likely you will lose data.\\n\\nWith this configuration in place, your “Observability pipeline” architecture would look like this:\\n\\n![opentelemetry collector](/assets/images/introduction-apm-tracing-logging/blog-elastic-opentelemetry-collector.png)\\n\\nThis completely decouples your sources of data from your chosen observability solution, which will future proof your observability stack going forward, enable you to reach massive scale, and make you less reliant on specific vendor code for collection of data.\\n\\nAnother thing we recommend doing is being intelligent about instrumentation. This will serve two benefits: you will get some CPU cycles back in the instrumented application, and your backend data collection systems will have less data to process. If you know, for example, that you have no interest in tracking calls to a specific endpoint, you can exclude those classes and methods from instrumentation.\\n\\nAnd finally, data tiering is a transformative approach for managing data storage that can significantly reduce the total cost of ownership (TCO) for businesses. Primarily, it allows organizations to store data across different types of storage mediums based on their accessibility needs and the value of the data. For instance, frequently accessed, high-value data can be stored in expensive, high-speed storage, while less frequently accessed, lower-value data can be stored in cheaper, slower storage.\\n\\nThis approach, often incorporated in cloud storage solutions, enables cost optimization by ensuring that businesses only pay for the storage they need at any given time. Furthermore, it provides the flexibility to scale up or down based on demand, eliminating the need for large capital expenditures on storage infrastructure. This scalability also reduces the need for costly over-provisioning to handle potential future demand.\\n\\n## Conclusion\\n\\nIn today\'s highly competitive and fast-paced software development landscape, simply relying on logging is no longer sufficient to ensure top-notch customer experiences. By adopting APM and distributed tracing, organizations can gain deeper insights into their systems, proactively detect and resolve issues, and maintain a robust user experience.\\n\\nIn this blog, we have explored the journey of moving from a logging-only approach to a comprehensive observability strategy that integrates logs, traces, and APM. We discussed the importance of cultivating a new monitoring mindset that prioritizes customer experience, and the necessary organizational changes required to drive APM and tracing adoption. We also delved into the various stages of the journey, including data ingestion, integration, analytics, and scaling.\\n\\nBy understanding and implementing these concepts, organizations can optimize their monitoring efforts, reduce MTTR, and keep their customers satisfied. Ultimately, prioritizing customer experience through APM and tracing can lead to a more successful and resilient enterprise in today\'s challenging environment.\\n\\n[Learn more about APM at Elastic](https://www.elastic.co/observability/application-performance-monitoring).\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)a(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!f.call(n,o)&&o!==i&&a(n,o,{get:()=>e[o],enumerable:!(s=u(e,o))||s.enumerable});return n};var b=(n,e,i)=>(i=n!=null?p(m(n)):{},r(e||!n||!n.__esModule?a(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>r(a({},\\"__esModule\\",{value:!0}),n);var c=y((T,l)=>{l.exports=_jsx_runtime});var A={};w(A,{default:()=>d,frontmatter:()=>k});var t=b(c()),k={title:\\"Gaining new perspectives beyond logging: An introduction to application performance monitoring\\",slug:\\"introduction-apm-tracing-logging\\",date:\\"2023-05-30\\",description:\\"Change is on the horizon for the world of logging. In this post, we\\\\u2019ll outline a recommended journey for moving from just logging to a fully integrated solution with logs, traces, and APM.\\",author:[{slug:\\"david-hope\\"}],image:\\"log-management-720x420_(2).jpeg\\",tags:[{slug:\\"java\\"},{slug:\\"log-analytics\\"},{slug:\\"apm\\"},{slug:\\"opentelemetry\\"}]};function h(n){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.h2,{id:\\"prioritize-customer-experience-with-apm-and-tracing\\",children:\\"Prioritize customer experience with APM and tracing\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Enterprise software development and operations has become an interesting space. We have some incredibly powerful tools at our disposal, yet as an industry, we have failed to adopt many of these tools that can make our lives easier. One such tool that is currently underutilized is \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"application performance monitoring\\"}),\\" (APM) and tracing, despite the fact that OpenTelemetry has made it possible to adopt at low friction.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Logging, however, is ubiquitous. Every software application has logs of some kind, and the default workflow for troubleshooting (even today) is to go from exceptions experienced by customers and systems to the logs and start from there to find a solution.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are various challenges with this, one of the main ones being that logs often do not give enough information to solve the problem. Many services today return ambiguous 500 errors with little or nothing to go on. What if there isn\\\\u2019t an error or log file at all or the problem is that the system is very slow? Logging alone cannot help solve these problems. This leaves users with half broken systems and poor user experiences. We\\\\u2019ve all been on the wrong side of this, and it can be incredibly frustrating.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The question I find myself asking is why does the customer experience often come second to errors? If the customer experience is a top priority, then a strategy should be in place to adopt tracing and APM and make this as important as logging. Users should stop going to logs by default and thinking primarily in logs, as many are doing today. This will also come with some required changes to mental models.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"What\\\\u2019s the path to get there? That\\\\u2019s exactly what we will explore in this blog post. We will start by talking about supporting organizational changes, and then we\\\\u2019ll outline a recommended journey for moving from just logging to a fully integrated solution with logs, traces, and APM.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"cultivating-a-new-monitoring-mindset-how-to-drive-apm-and-tracing-adoption\\",children:\\"Cultivating a new monitoring mindset: How to drive APM and tracing adoption\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To get teams to shift their troubleshooting mindset, what organizational changes need to be made?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Initially, businesses should consider strategic priorities and goals that need to be shared broadly among the teams. One thing that can help drive this in a very large organization is to consider an entire product team devoted to Observability or a CoE (Center of Excellence) with its own roadmap and priorities.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This team (either virtual or permanent) should start with the customer in mind and work backward, starting with key questions like: What do I need to collect? What do I need to observe? How do I act? Once team members understand the answers to these questions, they can start to think about the technology decisions needed to drive those outcomes.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"From a tracing and APM perspective, the areas of greatest concern are the customer experience, service level objectives, and service level outcomes. From here, organizations can start to implement programs of work to continuously improve and share knowledge across teams. This will help to align teams around a common framework with shared goals.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the next few sections, we will go through a four step journey to help you maximize your success with APM and tracing. This journey will take you through the following key steps on your journey to successful APM adoption:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Ingest:\\"}),\\" What choices do you have to make to get tracing activated and start ingesting trace data into your observability tools?\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Integrate:\\"}),\\" How does tracing integrate with logs to enable full end-to-end observability, and what else beyond simple tracing can you utilize to get even better resolution on your data?\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Analytics and AIOPs:\\"}),\\" Improve the customer experience and reduce the noise through machine learning.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Scale and total cost of ownership:\\"}),\\" Roll out enterprise-wide tracing and adopt strategies to deal with data volume.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"1-ingest\\",children:\\"1. Ingest\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Ingesting data for APM purposes generally involves \\\\u201Cinstrumenting\\\\u201D the application. In this section, we will explore methods for instrumenting applications, talk a little bit about sampling, and finally wrap up with a note on using common schemas for data representation.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"getting-started-with-instrumentation\\",children:\\"Getting started with instrumentation\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"What options do we have for ingesting APM and trace data? There are many, many options we will discuss to help guide you, but first let\'s take a step back. APM has a deep history \\\\u2014 in very first implementations of APM, people were concerned mainly with timing methods, like this below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/introduction-apm-tracing-logging/blog-elastic-timing-methods.png\\",alt:\\"timing methods\\",width:\\"1234\\",height:\\"508\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Usually you had a configuration file to specify which methods you wanted to time, and the APM implementation would instrument the specified code with method timings.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"From here things started to evolve, and one of the first additions to APM was to add in tracing.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For Java, it\\\\u2019s fairly trivial to implement a system to do this by using what\'s known as a Java agent. You just specify -javagent command line argument, and the agent code gets access to the dynamic compilation routines within Java so it can modify the code before it is compiled into machine code, allowing you to \\\\u201Cwrap\\\\u201D specific methods with timing or tracing routines. So, auto instrumenting Java was one of the first things that the original APM vendors did.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/automatic/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry has agents like this\\"}),\\", and most observability vendors that offer APM solutions have their own proprietary ways of doing this, often with more advanced and differing features from the open source tooling.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Things have moved on since then, and Node.JS and Python are now popular.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As a result, ways of auto instrumenting these language runtimes have appeared, which mostly work by injecting the libraries into the code before starting them up. OpenTelemetry has a way of doing this on Kubernetes with an Operator and sidecar \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-operator/blob/main/README.md\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", which supports Python, Node.JS, Java, and DotNet.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The other alternative is to start adding APM and tracing API calls into your own code, which is not dissimilar to adding logging functionality. You may even wish to create an abstraction in your code to deal with this cross-cutting concern, although this is less of a problem now that there are open standards with which you can implement this.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can see an example of how to add OpenTelemetry spans and attributes to your code for manual instrumentation below and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/ChatGPTMonitoringWithOtel/blob/main/monitor.py\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`from flask import Flask\\nimport monitor # Import the module\\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\nimport urllib\\nimport os\\n\\nfrom opentelemetry import trace\\nfrom opentelemetry.sdk.resources import SERVICE_NAME, Resource\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\\n\\n\\n# Service name is required for most backends\\nresource = Resource(attributes={\\n SERVICE_NAME: \\"your-service-name\\"\\n})\\n\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(OTLPSpanExporter(endpoint=os.getenv(\'OTEL_EXPORTER_OTLP_ENDPOINT\'),\\n headers=\\"Authorization=Bearer%20\\"+os.getenv(\'OTEL_EXPORTER_OTLP_AUTH_HEADER\')))\\n\\nprovider.add_span_processor(processor)\\ntrace.set_tracer_provider(provider)\\ntracer = trace.get_tracer(__name__)\\nRequestsInstrumentor().instrument()\\n\\n# Initialize Flask app and instrument it\\napp = Flask(__name__)\\n\\n@app.route(\\"/completion\\")\\n@tracer.start_as_current_span(\\"do_work\\")\\ndef completion():\\n span = trace.get_current_span()\\n if span:\\n span.set_attribute(\\"completion_count\\",1)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"By implementing APM in this way, you could even eliminate the need to do any logging by storing all your required logging information within span attributes, exceptions, and metrics. The downside is that you can only do this with code that you own, so you will not be able to remove all logs this way.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"sampling\\",children:\\"Sampling\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Many people don\\\\u2019t realize that APM is an expensive process. It adds a lot of CPU cycles and memory to your applications, and although there is a lot of value to be had, there are certainly trade-offs to be made.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Should you sample everything 100% and eat the cost? Or should you think about an intelligent trade-off with fewer samples or even tail-based sampling, which many products commonly support? Here, we will talk about the two most common sampling techniques \\\\u2014 head-based sampling and tail-based sampling \\\\u2014 to help you decide.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Head-based sampling\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"In this approach, sampling decisions are made at the beginning of a trace, typically at the entry point of a service or application. A fixed rate of traces is sampled, and this decision propagates through all the services involved in a distributed trace.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With head-based sampling, you can control the rate using a configuration, allowing you to control the percentage of requests that are sampled and reported to the APM server. For instance, a sampling rate of 0.5 means that only 50% of requests are sampled and sent to the server. This is useful for reducing the amount of collected data while still maintaining a representative sample of your application\'s performance.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Tail-based sampling\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Unlike head-based sampling, tail-based sampling makes sampling decisions after the entire trace has been completed. This allows for more intelligent sampling decisions based on the actual trace data, such as only reporting traces with errors or traces that exceed a certain latency threshold.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We recommend tail-based sampling because it has the highest likelihood of reducing the noise and helping you focus on the most important issues. It also helps keep costs down on the data store side. A downside of tail-based sampling, however, is that it results in more data being generated from APM agents. This could use more CPU and memory on your application.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"opentelemetry-semantic-conventions-and-elastic-common-schema\\",children:\\"OpenTelemetry Semantic Conventions and Elastic Common Schema\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"OpenTelemetry prescribes Semantic Conventions, or Semantic Attributes, to establish uniform names for various operations and data types. Adhering to these conventions fosters standardization across codebases, libraries, and platforms, ultimately streamlining the monitoring process.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Creating OpenTelemetry spans for tracing is flexible, allowing implementers to annotate them with operation-specific attributes. These spans represent particular operations within and between systems, often involving widely recognized protocols like HTTP or database calls. To effectively represent and analyze a span in monitoring systems, supplementary information is necessary, contingent upon the protocol and operation type.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Unifying attribution methods across different languages is essential for operators to easily correlate and cross-analyze telemetry from polyglot microservices without needing to grasp language-specific nuances.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\'s recent contribution of the Elastic Common Schema to OpenTelemetry enhances Semantic Conventions to encompass logs and security.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Abiding by a shared schema yields considerable benefits, enabling operators to rapidly identify intricate interactions and correlate logs, metrics, and traces, thereby expediting root cause analysis and reducing time spent searching for logs and pinpointing specific time frames.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We advocate for adhering to established schemas such as ECS when defining trace, metrics, and log data in your applications, particularly when developing new code. This practice will conserve time and effort when addressing issues.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"2-integrate\\",children:\\"2. Integrate\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Integrations are very important for APM. How well your solution can integrate with other tools and technologies such as cloud, as well as its ability to integrate logs and metrics into your tracing data, is critical to fully understand the customer experience. In addition, most APM vendors have adjacent solutions for \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/synthetic-monitoring\\",rel:\\"nofollow\\",children:\\"synthetic monitoring\\"}),\\" and profiling to gain deeper perspectives to supercharge your APM. We will explore these topics in the following section.\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"apm--logs--superpowers\\",children:\\"APM + logs = superpowers!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Because APM agents can instrument code, they can also instrument code that is being used for logging. This way, you can capture log lines directly within APM. \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/master/logs-send-application.html\\",rel:\\"nofollow\\",children:\\"This is normally simple to enable\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With this enabled, you will also get automated injection of useful fields like these:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"service.name, service.version, service.environment\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"trace.id, transaction.id, error.id\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This means log messages will be automatically correlated with transactions as shown below, making it far easier to reduce mean time to resolution (MTTR) and find the needle in the haystack:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/introduction-apm-tracing-logging/blog-elastic-latency-distribution.png\\",alt:\\"latency distribution\\",width:\\"1498\\",height:\\"762\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"If this is available to you, we highly recommend turning it on.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"deploying-apm-inside-kubernetes\\",children:\\"Deploying APM inside Kubernetes\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"It is common for people to want to deploy APM inside a Kubernetes environment, and tracing is critical for monitoring applications in cloud-native environments. There are three different ways you can tackle this.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"1. Auto instrumentation using sidecars\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"With Kubernetes, it is possible to use an init container and something that will modify Kubernetes manifests on the fly to auto instrument your applications.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The init container will be used simply to copy the required library or jar file into the container at startup that you need to the main Kubernetes pod. Then, you can use \\",(0,t.jsx)(e.a,{href:\\"https://kustomize.io/\\",rel:\\"nofollow\\",children:\\"Kustomize\\"}),\\" to add the required command line arguments to bootstrap your agents.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you are not familiar with it, Kustomize adds, removes, or modifies Kubernetes manifests on the fly. It is even available as a flag to the Kubernetes CLI \\\\u2014 simply execute kubectl -k.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"OpenTelemetry has an \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-operator/blob/main/README.md\\",rel:\\"nofollow\\",children:\\"operator\\"}),\\" that does all this for you automatically (without the need for Kustomize) for Java, DotNet, Python, and Node.JS, and many vendors also have their own operator or \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/attacher/current/apm-attacher.html\\",rel:\\"nofollow\\",children:\\"helm charts\\"}),\\" that can achieve the same result.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"2. Baking APM into containers or code\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"A second option for deploying out APM in Kubernetes \\\\u2014 and indeed any containerized environment \\\\u2014 is using Docker to bake the APM agents and configuration into a dockerfile.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Have a look at an example here using the OpenTelemetry Java Agent:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-dockerfile\\",children:`# Use the official OpenJDK image as the base image\\nFROM openjdk:11-jre-slim\\n\\n# Set up environment variables\\nENV APP_HOME /app\\nENV OTEL_VERSION 1.7.0-alpha\\nENV OTEL_JAVAAGENT_URL https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v\\\\${OTEL_VERSION}/opentelemetry-javaagent-\\\\${OTEL_VERSION}-all.jar\\n\\n# Create the application directory\\nRUN mkdir $APP_HOME\\nWORKDIR $APP_HOME\\n\\n# Download the OpenTelemetry Java agent\\nADD \\\\${OTEL_JAVAAGENT_URL} /otel-javaagent.jar\\n\\n# Add your Java application JAR file\\nCOPY your-java-app.jar $APP_HOME/your-java-app.jar\\n\\n# Expose the application port (e.g. 8080)\\nEXPOSE 8080\\n\\n# Configure the OpenTelemetry Java agent and run the application\\nCMD java -javaagent:/otel-javaagent.jar \\\\\\\\\\n -Dotel.resource.attributes=service.name=your-service-name \\\\\\\\\\n -Dotel.exporter.otlp.endpoint=your-otlp-endpoint:4317 \\\\\\\\\\n -Dotel.exporter.otlp.insecure=true \\\\\\\\\\n -jar your-java-app.jar\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"3. Tracing using a service mesh (Envoy/Istio)\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"The final option you have here is if you are using a service mesh. A service mesh is a dedicated infrastructure layer for handling service-to-service communication in a microservices architecture. It provides a transparent, scalable, and efficient way to manage and control the communication between services, enabling developers to focus on building application features without worrying about inter-service communication complexities.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The great thing about this is that we can activate tracing within the proxy and therefore get visibility into requests between services. We don\\\\u2019t have to change any code or even run APM agents for this; we simply turn on the OpenTelemetry collector that exists within the proxy \\\\u2014 therefore this is likely the lowest overhead solution. \\",(0,t.jsx)(e.a,{href:\\"https://www.envoyproxy.io/docs/envoy/latest/start/sandboxes/opentelemetry\\",rel:\\"nofollow\\",children:\\"Learn more about this option\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"synthetics-universal-profiling\\",children:\\"Synthetics Universal Profiling\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Most APM vendors have add ons to the primary APM use cases. Typically we see synthetics and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"continuous profiling\\"}),\\" being added to APM solutions. APM can integrate with both, and there is some good value in bringing these technologies together to give even more insights into issues.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Synthetics\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Synthetic monitoring is a method used to measure the performance, availability, and reliability of web applications, websites, and APIs by simulating user interactions and traffic. It involves creating scripts or automated tests that mimic real user behavior, such as navigating through pages, filling out forms, or clicking buttons, and then running these tests periodically from different locations and devices.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This gives Development and Operations teams the ability to spot problems far earlier than they might otherwise, catching issues before real users do in many cases.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Synthetics can be integrated with APM \\\\u2014 inject an APM agent into the website when the script runs, so even if you didn\\\\u2019t put end user monitoring into your website initially, it can be injected at run time. This usually happens without any input from the user. From there, a tracing id for each request can be passed down through the various layers of the system, allowing teams to follow the request all the way from the synthetics script to the lowest levels of the application stack such as the database.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/introduction-apm-tracing-logging/blog-elastic-rainbow-sandals.png\\",alt:\\"observability rainbow sandals\\",width:\\"1999\\",height:\\"1505\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Universal profiling\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"\\\\u201CProfiling\\\\u201D is a dynamic method of analyzing the complexity of a program, such as CPU utilization or the frequency and duration of function calls. With profiling, you can locate exactly which parts of your application are consuming the most resources. \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"\\\\u201CContinuous profiling\\\\u201D\\"}),\\" is a more powerful version of profiling that adds the dimension of time. By understanding your system\\\\u2019s resources over time, you can then locate, debug, and fix issues related to performance.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Universal profiling is a further extension of this, which allows you to capture profile information about all of the code running in your system all the time. Using a technology like \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ebpf-observability-security-workload-profiling\\",rel:\\"nofollow\\",children:\\"eBPF\\"}),\\" can allow you to see \\",(0,t.jsx)(e.em,{children:\\"all\\"}),\\" the function calls in your systems, including into things like the Kubernetes runtime. Doing this gives you the ability to finally see unknown unknowns \\\\u2014 things you didn\\\\u2019t know were problems. This is very different from APM, which is really about tracking individual traces and requests and the overall customer experience. Universal profiling is about overcoming those issues you didn\\\\u2019t even know existed and even answering the question \\\\u201CWhat is my most expensive line of code?\\\\u201D\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Universal profiling can be linked into APM, showing you profiles that occurred during a specific customer issue, for example, or by linking profiles directly to traces by looking at the global state that exists at the thread level. These technologies can work wonders when used together.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Typically, profiles are viewed as \\\\u201Cflame graphs\\\\u201D shown below. The boxes represent the amount of \\\\u201Con-cpu\\\\u201D time spent executing a particular function.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/introduction-apm-tracing-logging/blog-elastic-universal-profiling.png\\",alt:\\"observability universal profiling\\",width:\\"1441\\",height:\\"852\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"3-analytics-and-aiops\\",children:\\"3. Analytics and AIOps\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The interesting thing about APM is it opens up a whole new world of analytics versus just logs. All of a sudden, you have access to the information flows from \\",(0,t.jsx)(e.em,{children:\\"inside\\"}),\\" applications.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This allows you to easily capture things like the amount of money a specific customer is currently spending on your most critical ecommerce store, or look at failed trades in a brokerage app to see how much lost revenue those failures are impacting. You can even then apply machine learning algorithms to project future spend or look at anomalies occurring in this data, giving you a new window into how your business runs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this section, we will look at ways to do this and how to get the most out of this new world, as well as how to apply AIOps practices to this new data. We will also discuss getting SLIs and SLOs setup for APM data.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"getting-business-data-into-your-traces\\",children:\\"Getting business data into your traces\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"There are generally two ways of getting business data into your traces. You can modify code and add in Span attributes, an example of which is available \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/ChatGPTMonitoringWithOtel/blob/main/monitor.py\\",rel:\\"nofollow\\",children:\\"here\\"}),\\" and shown below. Or you can write an extension or a plugin, which has the benefit of avoiding code changes. OpenTelemetry supports \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/extensions/\\",rel:\\"nofollow\\",children:\\"adding extensions in its auto-instrumentation agents\\"}),\\". Most other APM vendors usually have something similar.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`def count_completion_requests_and_tokens(func):\\n @wraps(func)\\n def wrapper(*args, **kwargs):\\n counters[\'completion_count\'] += 1\\n response = func(*args, **kwargs)\\n\\n token_count = response.usage.total_tokens\\n prompt_tokens = response.usage.prompt_tokens\\n completion_tokens = response.usage.completion_tokens\\n cost = calculate_cost(response)\\n strResponse = json.dumps(response)\\n\\n # Set OpenTelemetry attributes\\n span = trace.get_current_span()\\n if span:\\n span.set_attribute(\\"completion_count\\", counters[\'completion_count\'])\\n span.set_attribute(\\"token_count\\", token_count)\\n span.set_attribute(\\"prompt_tokens\\", prompt_tokens)\\n span.set_attribute(\\"completion_tokens\\", completion_tokens)\\n span.set_attribute(\\"model\\", response.model)\\n span.set_attribute(\\"cost\\", cost)\\n span.set_attribute(\\"response\\", strResponse)\\n return response\\n return wrapper\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"using-business-data-for-fun-and-profit\\",children:\\"Using business data for fun and profit\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you have the business data in your traces, you can start to have some fun with it. Take a look at the example below for a financial services fraud team. Here we are tracking transactions \\\\u2014 average transaction value for our larger business customers. Crucially, we can see if there are any unusual transactions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/introduction-apm-tracing-logging/blog-elastic-customer-count.png\\",alt:\\"customer count\\",width:\\"1483\\",height:\\"931\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"A lot of this is powered by machine learning, which can classify transactions or do \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:\\"anomaly detection\\"}),\\". Once you start capturing the data, it is possible to do a lot of useful things like this, and with a flexible platform, integrating machine learning models into this process becomes a breeze.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/introduction-apm-tracing-logging/blog-elastic-fraud-12h.png\\",alt:\\"fraud 12-h\\",width:\\"923\\",height:\\"724\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"slis-and-slos\\",children:\\"SLIs and SLOs\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Service level indicators (SLIs) and service level objectives (SLOs) serve as critical components for maintaining and enhancing application performance. SLIs, which represent key performance metrics such as latency, error rate, and throughput, help quantify an application\'s performance, while SLOs establish target performance levels to meet user expectations.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"By selecting relevant SLIs and setting achievable SLOs, organizations can better monitor their application\'s performance using APM tools. Continually evaluating and adjusting SLIs and SLOs in response to changes in application requirements, user expectations, or the competitive landscape ensures that the application remains competitive and delivers an exceptional user experience.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In order to define and track SLIs and SLOs, APM becomes a critical perspective that is needed for understanding the user experience. Once APM is implemented, we recommend that organizations perform the following steps.\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Define SLOs and SLIs required to track them.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Define SLO budgets and how they are calculated. Reflect business\\\\u2019 perspective and set realistic targets.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Define SLIs to be measured from a user experience perspective.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Define different alerting and paging rules, page only on customer facing SLO degradations, record symptomatic alerts, notify on critical symptomatic alerts.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Synthetic monitoring and end user monitoring (EUM) can also help with getting even more data required to understand latency, throughput, and error rate from the user\\\\u2019s perspective, where it is critical to get good business focused metrics and data from.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"4-scale-and-total-cost-of-ownership\\",children:\\"4. Scale and total cost of ownership\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With increased perspectives, customers often run into scalability and total cost of ownership issues. All this new data can be overwhelming. Luckily there are various techniques you can use to deal with this. Tracing itself can actually help with volume challenges because you can decompose unstructured logs and combine them with traces, which leads to additional efficiency. You can also use different sampling methods to deal with scale challenges (i.e., both techniques we previously mentioned).\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to this, for large enterprise scale, we can use streaming pipelines like Kafka or Pulsar to manage the data volumes. This has an additional benefit that you get for free: if you take down the systems consuming the data or they face outages, it is less likely you will lose data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With this configuration in place, your \\\\u201CObservability pipeline\\\\u201D architecture would look like this:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/introduction-apm-tracing-logging/blog-elastic-opentelemetry-collector.png\\",alt:\\"opentelemetry collector\\",width:\\"816\\",height:\\"843\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This completely decouples your sources of data from your chosen observability solution, which will future proof your observability stack going forward, enable you to reach massive scale, and make you less reliant on specific vendor code for collection of data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Another thing we recommend doing is being intelligent about instrumentation. This will serve two benefits: you will get some CPU cycles back in the instrumented application, and your backend data collection systems will have less data to process. If you know, for example, that you have no interest in tracking calls to a specific endpoint, you can exclude those classes and methods from instrumentation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"And finally, data tiering is a transformative approach for managing data storage that can significantly reduce the total cost of ownership (TCO) for businesses. Primarily, it allows organizations to store data across different types of storage mediums based on their accessibility needs and the value of the data. For instance, frequently accessed, high-value data can be stored in expensive, high-speed storage, while less frequently accessed, lower-value data can be stored in cheaper, slower storage.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This approach, often incorporated in cloud storage solutions, enables cost optimization by ensuring that businesses only pay for the storage they need at any given time. Furthermore, it provides the flexibility to scale up or down based on demand, eliminating the need for large capital expenditures on storage infrastructure. This scalability also reduces the need for costly over-provisioning to handle potential future demand.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In today\'s highly competitive and fast-paced software development landscape, simply relying on logging is no longer sufficient to ensure top-notch customer experiences. By adopting APM and distributed tracing, organizations can gain deeper insights into their systems, proactively detect and resolve issues, and maintain a robust user experience.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we have explored the journey of moving from a logging-only approach to a comprehensive observability strategy that integrates logs, traces, and APM. We discussed the importance of cultivating a new monitoring mindset that prioritizes customer experience, and the necessary organizational changes required to drive APM and tracing adoption. We also delved into the various stages of the journey, including data ingestion, integration, analytics, and scaling.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"By understanding and implementing these concepts, organizations can optimize their monitoring efforts, reduce MTTR, and keep their customers satisfied. Ultimately, prioritizing customer experience through APM and tracing can lead to a more successful and resilient enterprise in today\'s challenging environment.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"Learn more about APM at Elastic\\"}),\\".\\"]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(A);})();\\n;return Component;"},"_id":"articles/introduction-apm-tracing-logging-customer-experience.mdx","_raw":{"sourceFilePath":"articles/introduction-apm-tracing-logging-customer-experience.mdx","sourceFileName":"introduction-apm-tracing-logging-customer-experience.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/introduction-apm-tracing-logging-customer-experience"},"type":"Article","imageUrl":"/assets/images/introduction-apm-tracing-logging/log-management-720x420_(2).jpeg","readingTime":"23 min read","url":"/introduction-apm-tracing-logging","headings":[{"level":2,"title":"Prioritize customer experience with APM and tracing","href":"#prioritize-customer-experience-with-apm-and-tracing"},{"level":2,"title":"Cultivating a new monitoring mindset: How to drive APM and tracing adoption","href":"#cultivating-a-new-monitoring-mindset-how-to-drive-apm-and-tracing-adoption"},{"level":2,"title":"1. Ingest","href":"#1-ingest"},{"level":3,"title":"Getting started with instrumentation","href":"#getting-started-with-instrumentation"},{"level":3,"title":"Sampling","href":"#sampling"},{"level":3,"title":"OpenTelemetry Semantic Conventions and Elastic Common Schema","href":"#opentelemetry-semantic-conventions-and-elastic-common-schema"},{"level":2,"title":"2. Integrate","href":"#2-integrate"},{"level":3,"title":"APM + logs = superpowers!","href":"#apm--logs--superpowers"},{"level":3,"title":"Deploying APM inside Kubernetes","href":"#deploying-apm-inside-kubernetes"},{"level":3,"title":"Synthetics Universal Profiling","href":"#synthetics-universal-profiling"},{"level":2,"title":"3. Analytics and AIOps","href":"#3-analytics-and-aiops"},{"level":3,"title":"Getting business data into your traces","href":"#getting-business-data-into-your-traces"},{"level":3,"title":"Using business data for fun and profit","href":"#using-business-data-for-fun-and-profit"},{"level":3,"title":"SLIs and SLOs","href":"#slis-and-slos"},{"level":2,"title":"4. Scale and total cost of ownership","href":"#4-scale-and-total-cost-of-ownership"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Kibana: How to create impactful visualisations with magic formulas ? (part 1)","slug":"kibana-impactful-visualizations-with-magic-formulas-part1","date":"2024-09-09","description":"We will see how magic math formulas in the Kibana Lens editor can help to highlight high values.","image":"kibana-magic-formulas-p1.png","author":[{"slug":"vincent-dusordet","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\n## Kibana: How to create impactful visualizations with magic formulas? (part 1)\\n\\n### Introduction\\n\\n\\nIn the previous blog post,[ Designing Intuitive Kibana Dashboards as a non-designer](https://www.elastic.co/blog/designing-intuitive-kibana-dashboards-as-a-non-designer), we highlighted the importance of creating intuitive dashboards. It demonstrated how simple changes (grouping themes, changing type charts, and more) can make a difference in understanding your data. When delivering courses like[ Data Analysis with Kibana](https://www.elastic.co/training/data-analysis-with-kibana) or[ Elastic Observability Engineer](https://www.elastic.co/training/elastic-observability-engineer) courses, we emphasize this blog post and how these changes help bring essential information to the surface. I like a complementary approach to reach this goal: using two colors to separate the highest data values from the common ones.\\n\\nTo illustrate this idea, we will use the _Sample flight data_ dataset. Now, let’s compare two visualizations ranking the top 10 destination countries per total number of flights. Which visualization has a higher impact?\\n\\n![Flights: Top 10 destinations](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-dbg-excalidraw-flights-teaser-intro.png)\\n\\nIf you chose the second one, you may be wondering how this was done with the Kibana Lens editor. While preparing for the certification last year, I found a way to achieve this result. The secret is using two different layers and some magic formulas. This post will explain how math in Lens formulas helps create two data-color visualizations. \\n\\nWe will start with the first example that emphasizes only the highest value of the dataset we are focusing on. The second example describes how to highlight other high values (as shown in the illustration above).\\n\\n_[Note: the tips explained in this blog post can be applied from v 7.15]_\\n\\n\\n## Only the highest value\\n\\nTo understand how math helps to separate high values from common ones, let’s start with this first example: emphasizing only the highest value.\\n\\n![1.1 flights: ](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-teaser.png)\\n\\nWe start with a bar horizontal chart:\\n\\n![1.1 flights: Lens bar horizontal chart](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-kibana-bar-horizontal-setup.png)\\n\\nWe need to identify the highest value of the scope we are currently examining. We will use one proper overall\\\\_\\\\* function: the **overall\\\\_max()**, a pipeline function (equivalent to a pipeline aggregation in Query DSL).\xa0\\n\\nIn our example, we group the flights by country(destination). This means we count the number of flights for each DestCountry (= 1 bucket). The **overall\\\\_max()** will select which bucket has the highest value.\xa0\\n\\nThe math trick here is to divide the number of flights per bucket by the maximum value found among all buckets. Only one bucket will return 1: the bucket matching the max value found by overall\\\\_max(). All the other buckets will return a value < 1 and >0. We use **floor()** to ensure any 0.xxx values are rounded to 0.\xa0\\n\\n![1.1 flights: explaining floor()](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-explaination-floor.png)\\n\\nNow, we can multiple it with a count() and we have our formula for the 1st layer!\\n\\n**_Layer 1_**: `count()*floor(count()/overall_max(count()))`\\n\\nFrom here, in Lens Editor, we duplicate the layer to adjust the formula of the second layer containing the rest of the data. We need to append another count() followed by the minus operator to the formula. This is the other trick. In this layer, we just need to ensure the highest value is not represented, which will happen only once. It is when count() = overall\\\\_max(), which is = 1 when we divide them.\\n\\n![1.1 flights: layer 1 + layer 2](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-explaination-layer1-and-layer2.png)\\n\\n**_Layer 2_**: `count() - count()*floor(count()/overall_max(count()))`\\n\\nTo achieve a nice merge of these two layers, we need to do the following adjustments in both:\\n\\n- select **bar horizontal stacked**\\n\\n- Vertical axis: change”Rank by” to Custom and ensure Rank function is “Count”\\n\\nHere is the final setup of the two layers:\\n\\n![1.1 flights: 2layers setup](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-kibana-final-2layers-setup.png)\\n\\n**_Layer 1_**: `count()*floor(count()/overall_max(count()))`\\n\\n**_Layer 2_**: `count() - count()*floor(count()/overall_max(count()))`\\n\\nThis visualization also works well for time series data where you need to quickly highlight which time period (12h in the example below) had the highest number of flights:\\\\\\n![1.1 flights: timeseries example](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-timeserie-example.png)\\n\\n\\n## Above the surface\\n\\nBuilding on what we have done earlier, we can extend the approach to get other high values above the surface. Let’s see which formula we used to create the visualization in the introduction:\\n\\n![2.1 Flights: Top 10 destinations](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-dbg-excalidraw-flights-teaser-intro-s1.png)\\n\\nFor this visualization, we used a property of the **round()** function. This function brings in only the values greater than 50% of the highest value.\\n\\n\\n![2.1 flights: round() > 50% of max explanation](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-2.1-explaination-round.png)\\n\\nLet\'s duplicate our first visualization and swap out the floor() function with round().\\n\\n**_Layer 1_**: `count()*round(count()/overall_max(count()))`\\n\\n**_Layer 2_**: `count() - count()*round(count()/overall_max(count()))`\\n\\nIt was an easy fix.\\\\\\nWhat if we want to extend the first layer further by adding more high values?\\\\\\nFor instance, we would like all the values above the average.\\n\\nTo do this, we use **overall\\\\_average**() as a new reference value instead of the overall\\\\_max () reference to separate the eligible values in Layer 1.\\n\\nAs we are comparing against the average value among all the buckets, the division might return values greater than 1.\\n\\n\\n![2.2 flights: round() explanation](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-2.2-explaination-floor.png)\\n\\nHere, the **clamp**() function nicely solves this issue.\xa0\\n\\nAccording to the formula reference, clamp() \\"limits the value from a minimum to maximum\\". Combining clamp() and floor() ensures that there are only two possible output values: either the minimum value ( 0 ) or the maximum value ( 1 ) given as parameters.\\n\\n![2.2 flights: clamp() explanation](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-2.2-explaination-clamp.png)\\n\\nApplied to our flights dataset, it highlights the country destinations that have more flights than the average:\\n\\n![2.2 flights: above the overall average ](/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-dbg-excalidraw-flights-2.2-above-overall-average.png)\\n\\n\\n**_Layer 1_**: `count()*clamp(floor(count()/overall_average(count())),0,1)`\\n\\n**_Layer 2_**: `count() - count()*clamp(floor(count()/overall_average(count())),0,1)`\\n\\nIt also opens up options for using other dynamic references. For instance, we could place all the values greater than 60% of the highest above the surface ( > `0.6*overall_max(count())`). \\nWe can tune our formula as follow:\xa0\\n\\n```\\n\\ncount()*clamp(floor(count()/(0.6*overall_max(count()) ) ),0,1)\\n```\\n\\n\\n## Conclusion\\n\\nIn the first part, we have seen the main tips allowing us to create a two-color histogram:\\n\\n- Two layers: one for the highest value and one for the remaining values\\n\\n- Visualization type: bar horizontal/vertical **stacked**\\n\\n- To separate the data we use a formula where only the highest value return 1 otherwise 0\\n\\n\xa0\\n\\nThen in the second part, we have seen how we can extend this principle to embrace more high values above the surface. This approach can be summarized as follows:\\n\\n- Start with layer 1 focusing on the high value: count()\\\\*\\\\\\n\\n- Duplicate the layer and adjust the formula:\\\\\\n ( count() - count()\\\\*\\\\)\\n\\nFinally, we provide 4 generic formulas that are ready to use to spice up your dashboards:\\n\\n| | |\\n| ----------------------- | :-----------------------------------------------------: |\\n| **1. Only the highest** | |\\n| Layer 1 | `count()*floor(count()/overall_max(count()))` |\\n| Layer 2 | `count() - count()*floor(count()/overall_max(count()))` |\\n\\n| | |\\n| --------------------------------------------------------------------- | :-----------------------------------------------------: |\\n| **2.1. Above the surface :** high values (above 50% of the max value) | |\\n| Layer 1 | `count()*floor(count()/overall_max(count()))` |\\n| Layer 2 | `count() - count()*floor(count()/overall_max(count()))` |\\n\\n| | |\\n| ----------------------------------------------------------------- | :--------------------------------------------------------------------: |\\n| **2.2. Above the surface :** all values above the overall average | |\\n| Layer 1 | `count()*clamp(floor(count()/overall_average(count())),0,1)` |\\n| Layer 2 | `count() - count()*clamp(floor(count()/overall_average(count())),0,1)` |\\n\\n| | |\\n| --------------------------------------------------------------------------- | :------------------------------------------------------------------------: |\\n| **2.2. Above the surface :** all the values greater than 60% of the highest | |\\n| Layer 1 | `count()*clamp(floor(count()/(0.6*overall_max(count()) ) ),0,1)` |\\n| Layer 2 | `count() - count()*clamp(floor(count()/(0.6*overall_max(count()) ) ),0,1)` |\\n\\nTry these examples out for yourself by signing up for a [free trial of Elastic Cloud](https://cloud.elastic.co/registration?elektra=10-common-questions-kibana-blog) or [download](https://www.elastic.co/downloads/) the self-managed version of the Elastic Stack for free. If you have additional questions about getting started, head on over to the [Kibana forum](https://discuss.elastic.co/c/elastic-stack/kibana/7) or check out the [Kibana documentation guide](https://www.elastic.co/guide/en/kibana/current/index.html).\\\\\\nIn the next blog post, we will see how the new function **ifelse**() (introduced in version 8.6) will greatly simplify the creation of visualizations with more advanced formulas.\\n\\n**References**:\\n\\n- [Designing intuitive Kibana dashboards as a non-designer](https://www.elastic.co/blog/designing-intuitive-kibana-dashboards-as-a-non-designer)\\n\\n- [Kibana: Lens editor - use formula to perform math](https://www.elastic.co/guide/en/kibana/current/lens.html#lens-formulas)\\n\\n- Discovering the clamp() function [in this discussion (Thanks Marco!)](https://discuss.elastic.co/t/if-condition-in-kibana-table-visualization/305751/5)\\n","code":"var Component=(()=>{var u=Object.create;var l=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),v=(n,e)=>{for(var i in e)l(n,i,{get:e[i],enumerable:!0})},o=(n,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!f.call(n,a)&&a!==i&&l(n,a,{get:()=>e[a],enumerable:!(r=g(e,a))||r.enumerable});return n};var b=(n,e,i)=>(i=n!=null?u(p(n)):{},o(e||!n||!n.__esModule?l(i,\\"default\\",{value:n,enumerable:!0}):i,n)),y=n=>o(l({},\\"__esModule\\",{value:!0}),n);var s=w((_,h)=>{h.exports=_jsx_runtime});var k={};v(k,{default:()=>d,frontmatter:()=>x});var t=b(s()),x={title:\\"Kibana: How to create impactful visualisations with magic formulas ? (part 1)\\",slug:\\"kibana-impactful-visualizations-with-magic-formulas-part1\\",date:\\"2024-09-09\\",description:\\"We will see how magic math formulas in the Kibana Lens editor can help to highlight high values.\\",author:[{slug:\\"vincent-dusordet\\"}],image:\\"kibana-magic-formulas-p1.png\\",tags:[{slug:\\"log-analytics\\"}]};function c(n){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",div:\\"div\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",table:\\"table\\",tbody:\\"tbody\\",td:\\"td\\",th:\\"th\\",thead:\\"thead\\",tr:\\"tr\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.h2,{id:\\"kibana-how-to-create-impactful-visualizations-with-magic-formulas-part-1\\",children:\\"Kibana: How to create impactful visualizations with magic formulas? (part 1)\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"introduction\\",children:\\"Introduction\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the previous blog post,\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/designing-intuitive-kibana-dashboards-as-a-non-designer\\",rel:\\"nofollow\\",children:\\" Designing Intuitive Kibana Dashboards as a non-designer\\"}),\\", we highlighted the importance of creating intuitive dashboards. It demonstrated how simple changes (grouping themes, changing type charts, and more) can make a difference in understanding your data. When delivering courses like\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/training/data-analysis-with-kibana\\",rel:\\"nofollow\\",children:\\" Data Analysis with Kibana\\"}),\\" or\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/training/elastic-observability-engineer\\",rel:\\"nofollow\\",children:\\" Elastic Observability Engineer\\"}),\\" courses, we emphasize this blog post and how these changes help bring essential information to the surface. I like a complementary approach to reach this goal: using two colors to separate the highest data values from the common ones.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To illustrate this idea, we will use the \\",(0,t.jsx)(e.em,{children:\\"Sample flight data\\"}),\\" dataset. Now, let\\\\u2019s compare two visualizations ranking the top 10 destination countries per total number of flights. Which visualization has a higher impact?\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-dbg-excalidraw-flights-teaser-intro.png\\",alt:\\"Flights: Top 10 destinations\\",width:\\"2530\\",height:\\"1585\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you chose the second one, you may be wondering how this was done with the Kibana Lens editor. While preparing for the certification last year, I found a way to achieve this result. The secret is using two different layers and some magic formulas. This post will explain how math in Lens formulas helps create two data-color visualizations.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We will start with the first example that emphasizes only the highest value of the dataset we are focusing on. The second example describes how to highlight other high values (as shown in the illustration above).\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"[Note: the tips explained in this blog post can be applied from v 7.15]\\"})}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"only-the-highest-value\\",children:[\\"Only the highest value\\",(0,t.jsx)(\\"a\\",{id:\\"only-the-highest-value\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"To understand how math helps to separate high values from common ones, let\\\\u2019s start with this first example: emphasizing only the highest value.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-teaser.png\\",alt:\\"1.1 flights: \\",width:\\"1600\\",height:\\"903\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We start with a bar horizontal chart:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-kibana-bar-horizontal-setup.png\\",alt:\\"1.1 flights: Lens bar horizontal chart\\",width:\\"1134\\",height:\\"934\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We need to identify the highest value of the scope we are currently examining. We will use one proper overall_* function: the \\",(0,t.jsx)(e.strong,{children:\\"overall_max()\\"}),\\", a pipeline function (equivalent to a pipeline aggregation in Query DSL).\\\\xA0\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In our example, we group the flights by country(destination). This means we count the number of flights for each DestCountry (= 1 bucket). The \\",(0,t.jsx)(e.strong,{children:\\"overall_max()\\"}),\\" will select which bucket has the highest value.\\\\xA0\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The math trick here is to divide the number of flights per bucket by the maximum value found among all buckets. Only one bucket will return 1: the bucket matching the max value found by overall_max(). All the other buckets will return a value < 1 and >0. We use \\",(0,t.jsx)(e.strong,{children:\\"floor()\\"}),\\" to ensure any 0.xxx values are rounded to 0.\\\\xA0\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-explaination-floor.png\\",alt:\\"1.1 flights: explaining floor()\\",width:\\"1600\\",height:\\"871\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now, we can multiple it with a count() and we have our formula for the 1st layer!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Layer 1\\"})}),\\": \\",(0,t.jsx)(e.code,{children:\\"count()*floor(count()/overall_max(count()))\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"From here, in Lens Editor, we duplicate the layer to adjust the formula of the second layer containing the rest of the data. We need to append another count() followed by the minus operator to the formula. This is the other trick. In this layer, we just need to ensure the highest value is not represented, which will happen only once. It is when count() = overall_max(), which is = 1 when we divide them.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-explaination-layer1-and-layer2.png\\",alt:\\"1.1 flights: layer 1 + layer 2\\",width:\\"1600\\",height:\\"893\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Layer 2\\"})}),\\": \\",(0,t.jsx)(e.code,{children:\\"count() - count()*floor(count()/overall_max(count()))\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"To achieve a nice merge of these two layers, we need to do the following adjustments in both:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"select \\",(0,t.jsx)(e.strong,{children:\\"bar horizontal stacked\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Vertical axis: change\\\\u201DRank by\\\\u201D to Custom and ensure Rank function is \\\\u201CCount\\\\u201D\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here is the final setup of the two layers:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-kibana-final-2layers-setup.png\\",alt:\\"1.1 flights: 2layers setup\\",width:\\"4761\\",height:\\"3414\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Layer 1\\"})}),\\": \\",(0,t.jsx)(e.code,{children:\\"count()*floor(count()/overall_max(count()))\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Layer 2\\"})}),\\": \\",(0,t.jsx)(e.code,{children:\\"count() - count()*floor(count()/overall_max(count()))\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This visualization also works well for time series data where you need to quickly highlight which time period (12h in the example below) had the highest number of flights:\\",(0,t.jsx)(e.br,{}),`\\n`,(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-1.1-timeserie-example.png\\",alt:\\"1.1 flights: timeseries example\\",width:\\"1600\\",height:\\"587\\"})]}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"above-the-surface\\",children:[\\"Above the surface\\",(0,t.jsx)(\\"a\\",{id:\\"above-the-surface\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Building on what we have done earlier, we can extend the approach to get other high values above the surface. Let\\\\u2019s see which formula we used to create the visualization in the introduction:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-dbg-excalidraw-flights-teaser-intro-s1.png\\",alt:\\"2.1 Flights: Top 10 destinations\\",width:\\"1265\\",height:\\"792\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For this visualization, we used a property of the \\",(0,t.jsx)(e.strong,{children:\\"round()\\"}),\\" function. This function brings in only the values greater than 50% of the highest value.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-2.1-explaination-round.png\\",alt:\\"2.1 flights: round() > 50% of max explanation\\",width:\\"1600\\",height:\\"902\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\'s duplicate our first visualization and swap out the floor() function with round().\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Layer 1\\"})}),\\": \\",(0,t.jsx)(e.code,{children:\\"count()*round(count()/overall_max(count()))\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Layer 2\\"})}),\\": \\",(0,t.jsx)(e.code,{children:\\"count() - count()*round(count()/overall_max(count()))\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"It was an easy fix.\\",(0,t.jsx)(e.br,{}),`\\n`,\\"What if we want to extend the first layer further by adding more high values?\\",(0,t.jsx)(e.br,{}),`\\n`,\\"For instance, we would like all the values above the average.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To do this, we use \\",(0,t.jsx)(e.strong,{children:\\"overall_average\\"}),\\"() as a new reference value instead of the overall_max () reference to separate the eligible values in Layer 1.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"As we are comparing against the average value among all the buckets, the division might return values greater than 1.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-2.2-explaination-floor.png\\",alt:\\"2.2 flights: round() explanation\\",width:\\"1600\\",height:\\"837\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Here, the \\",(0,t.jsx)(e.strong,{children:\\"clamp\\"}),\\"() function nicely solves this issue.\\\\xA0\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\'According to the formula reference, clamp() \\"limits the value from a minimum to maximum\\". Combining clamp() and floor() ensures that there are only two possible output values: either the minimum value ( 0 ) or the maximum value ( 1 ) given as parameters.\'}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-wbg-flights-2.2-explaination-clamp.png\\",alt:\\"2.2 flights: clamp() explanation\\",width:\\"1600\\",height:\\"773\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Applied to our flights dataset, it highlights the country destinations that have more flights than the average:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/blog-1-dbg-excalidraw-flights-2.2-above-overall-average.png\\",alt:\\"2.2 flights: above the overall average \\",width:\\"2349\\",height:\\"1571\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Layer 1\\"})}),\\": \\",(0,t.jsx)(e.code,{children:\\"count()*clamp(floor(count()/overall_average(count())),0,1)\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:(0,t.jsx)(e.em,{children:\\"Layer 2\\"})}),\\": \\",(0,t.jsx)(e.code,{children:\\"count() - count()*clamp(floor(count()/overall_average(count())),0,1)\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"It also opens up options for using other dynamic references. For instance, we could place all the values greater than 60% of the highest above the surface ( > \\",(0,t.jsx)(e.code,{children:\\"0.6*overall_max(count())\\"}),`).\\nWe can tune our formula as follow:\\\\xA0`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`\\ncount()*clamp(floor(count()/(0.6*overall_max(count()) ) ),0,1)\\n`})}),`\\n`,(0,t.jsxs)(e.h2,{id:\\"conclusion\\",children:[\\"Conclusion\\",(0,t.jsx)(\\"a\\",{id:\\"conclusion\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the first part, we have seen the main tips allowing us to create a two-color histogram:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Two layers: one for the highest value and one for the remaining values\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Visualization type: bar horizontal/vertical \\",(0,t.jsx)(e.strong,{children:\\"stacked\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"To separate the data we use a formula where only the highest value return 1 otherwise 0\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"\\\\xA0\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Then in the second part, we have seen how we can extend this principle to embrace more high values above the surface. This approach can be summarized as follows:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Start with layer 1 focusing on the high value: count()*\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Duplicate the layer and adjust the formula:\\",(0,t.jsx)(e.br,{}),`\\n`,\\"( count() - count()*)\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Finally, we provide 4 generic formulas that are ready to use to spice up your dashboards:\\"}),`\\n`,(0,t.jsx)(e.div,{className:\\"table-container\\",children:(0,t.jsxs)(e.table,{children:[(0,t.jsx)(e.thead,{children:(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.th,{}),(0,t.jsx)(e.th,{style:{textAlign:\\"center\\"}})]})}),(0,t.jsxs)(e.tbody,{children:[(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:(0,t.jsx)(e.strong,{children:\\"1. Only the highest\\"})}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"}})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"Layer 1\\"}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"},children:(0,t.jsx)(e.code,{children:\\"count()*floor(count()/overall_max(count()))\\"})})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"Layer 2\\"}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"},children:(0,t.jsx)(e.code,{children:\\"count() - count()*floor(count()/overall_max(count()))\\"})})]})]})]})}),`\\n`,(0,t.jsx)(e.div,{className:\\"table-container\\",children:(0,t.jsxs)(e.table,{children:[(0,t.jsx)(e.thead,{children:(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.th,{}),(0,t.jsx)(e.th,{style:{textAlign:\\"center\\"}})]})}),(0,t.jsxs)(e.tbody,{children:[(0,t.jsxs)(e.tr,{children:[(0,t.jsxs)(e.td,{children:[(0,t.jsx)(e.strong,{children:\\"2.1. Above the surface :\\"}),\\" high values (above 50% of the max value)\\"]}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"}})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"Layer 1\\"}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"},children:(0,t.jsx)(e.code,{children:\\"count()*floor(count()/overall_max(count()))\\"})})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"Layer 2\\"}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"},children:(0,t.jsx)(e.code,{children:\\"count() - count()*floor(count()/overall_max(count()))\\"})})]})]})]})}),`\\n`,(0,t.jsx)(e.div,{className:\\"table-container\\",children:(0,t.jsxs)(e.table,{children:[(0,t.jsx)(e.thead,{children:(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.th,{}),(0,t.jsx)(e.th,{style:{textAlign:\\"center\\"}})]})}),(0,t.jsxs)(e.tbody,{children:[(0,t.jsxs)(e.tr,{children:[(0,t.jsxs)(e.td,{children:[(0,t.jsx)(e.strong,{children:\\"2.2. Above the surface :\\"}),\\" all values above the overall average\\"]}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"}})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"Layer 1\\"}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"},children:(0,t.jsx)(e.code,{children:\\"count()*clamp(floor(count()/overall_average(count())),0,1)\\"})})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"Layer 2\\"}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"},children:(0,t.jsx)(e.code,{children:\\"count() - count()*clamp(floor(count()/overall_average(count())),0,1)\\"})})]})]})]})}),`\\n`,(0,t.jsx)(e.div,{className:\\"table-container\\",children:(0,t.jsxs)(e.table,{children:[(0,t.jsx)(e.thead,{children:(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.th,{}),(0,t.jsx)(e.th,{style:{textAlign:\\"center\\"}})]})}),(0,t.jsxs)(e.tbody,{children:[(0,t.jsxs)(e.tr,{children:[(0,t.jsxs)(e.td,{children:[(0,t.jsx)(e.strong,{children:\\"2.2. Above the surface :\\"}),\\" all the values greater than 60% of the highest\\"]}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"}})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"Layer 1\\"}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"},children:(0,t.jsx)(e.code,{children:\\"count()*clamp(floor(count()/(0.6*overall_max(count()) ) ),0,1)\\"})})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"Layer 2\\"}),(0,t.jsx)(e.td,{style:{textAlign:\\"center\\"},children:(0,t.jsx)(e.code,{children:\\"count() - count()*clamp(floor(count()/(0.6*overall_max(count()) ) ),0,1)\\"})})]})]})]})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Try these examples out for yourself by signing up for a \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=10-common-questions-kibana-blog\\",rel:\\"nofollow\\",children:\\"free trial of Elastic Cloud\\"}),\\" or \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/downloads/\\",rel:\\"nofollow\\",children:\\"download\\"}),\\" the self-managed version of the Elastic Stack for free. If you have additional questions about getting started, head on over to the \\",(0,t.jsx)(e.a,{href:\\"https://discuss.elastic.co/c/elastic-stack/kibana/7\\",rel:\\"nofollow\\",children:\\"Kibana forum\\"}),\\" or check out the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/index.html\\",rel:\\"nofollow\\",children:\\"Kibana documentation guide\\"}),\\".\\",(0,t.jsx)(e.br,{}),`\\n`,\\"In the next blog post, we will see how the new function \\",(0,t.jsx)(e.strong,{children:\\"ifelse\\"}),\\"() (introduced in version 8.6) will greatly simplify the creation of visualizations with more advanced formulas.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"References\\"}),\\":\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/designing-intuitive-kibana-dashboards-as-a-non-designer\\",rel:\\"nofollow\\",children:\\"Designing intuitive Kibana dashboards as a non-designer\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/lens.html#lens-formulas\\",rel:\\"nofollow\\",children:\\"Kibana: Lens editor - use formula to perform math\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"Discovering the clamp() function \\",(0,t.jsx)(e.a,{href:\\"https://discuss.elastic.co/t/if-condition-in-kibana-table-visualization/305751/5\\",rel:\\"nofollow\\",children:\\"in this discussion (Thanks Marco!)\\"})]}),`\\n`]}),`\\n`]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(c,{...n})}):c(n)}return y(k);})();\\n;return Component;"},"_id":"articles/kibana-impactful-visualizations-with-magic-formulas-part1.mdx","_raw":{"sourceFilePath":"articles/kibana-impactful-visualizations-with-magic-formulas-part1.mdx","sourceFileName":"kibana-impactful-visualizations-with-magic-formulas-part1.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/kibana-impactful-visualizations-with-magic-formulas-part1"},"type":"Article","imageUrl":"/assets/images/kibana-impactful-visualizations-with-magic-formulas-part1/kibana-magic-formulas-p1.png","readingTime":"14 min read","url":"/kibana-impactful-visualizations-with-magic-formulas-part1","headings":[{"level":2,"title":"Kibana: How to create impactful visualizations with magic formulas? (part 1)","href":"#kibana-how-to-create-impactful-visualizations-with-magic-formulas-part-1"},{"level":3,"title":"Introduction","href":"#introduction"},{"level":2,"title":"Only the highest value","href":"#only-the-highest-valuea-idonly-the-highest-valuea"},{"level":2,"title":"Above the surface","href":"#above-the-surfacea-idabove-the-surfacea"},{"level":2,"title":"Conclusion","href":"#conclusiona-idconclusiona"}]},{"title":"Managing your Kubernetes cluster with Elastic Observability","slug":"kubernetes-cluster-metrics-logs-monitoring","date":"2022-10-24","description":"Unify all of your Kubernetes metrics, log, and trace data on a single platform and dashboard, Elastic. From the infrastructure to the application layer Elastic Observability makes it easier for you to understand how your cluster is performing.","image":"ManagingKubernetes-ElasticAgentIntegration-1.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"google-cloud","type":"Tag","_raw":{}},{"slug":"gke","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"elastic-agent","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs an operations engineer (SRE, IT manager, DevOps), you’re always struggling with how to manage technology and data sprawl. Kubernetes is becoming increasingly pervasive and a majority of these deployments will be in Amazon Elastic Kubernetes Service (EKS), Google Kubernetes Engine (GKE), or Azure Kubernetes Service (AKS). Some of you may be on a single cloud while others will have the added burden of managing clusters on multiple Kubernetes cloud services. In addition to cloud provider complexity, you also have to manage hundreds of deployed services generating more and more observability and telemetry data.\\n\\nThe day-to-day operations of understanding the status and health of your Kubernetes clusters and applications running on them, through the logs, metrics, and traces they generate, will likely be your biggest challenge. But as an operations engineer you will need all of that important data to help prevent, predict, and remediate issues. And you certainly don’t need that volume of metrics, logs and traces spread across multiple tools when you need to visualize and analyze Kubernetes telemetry data for troubleshooting and support.\\n\\nElastic Observability helps manage the sprawl of Kubernetes metrics and logs by providing extensive and centralized observability capabilities beyond just the logging that we are known for. Elastic Observability provides you with granular insights and context into the behavior of your Kubernetes clusters along with the applications running on them by unifying all of your metrics, log, and trace data through OpenTelemetry and APM agents.\\n\\nRegardless of the cluster location (EKS, GKE, AKS, self-managed) or application, [Kubernetes monitoring](https://www.elastic.co/what-is/kubernetes-monitoring) is made simple with Elastic Observability. All of the node, pod, container, application, and infrastructure (AWS, GCP, Azure) metrics, infrastructure and application logs, along with application traces are available in Elastic Observability.\\n\\nIn this blog we will show:\\n\\n- How [Elastic Cloud](https://cloud.elastic.co) can aggregate and ingest metrics and log data through the Elastic Agent (easily deployed on your cluster as a DaemonSet) to retrieve logs and metrics from the host (system metrics, container stats) along with logs from all services running on top of Kubernetes.\\n- How Elastic Observability can bring a unified telemetry experience (logs, metrics,traces) across all your Kubernetes cluster components (pods, nodes, services, namespaces, and more).\\n\\n![Elastic Agent with Kubernetes Integration](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-ElasticAgentIntegration-1.png)\\n\\n## Prerequisites and config\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)).\\n- While we used GKE, you can use any location for your Kubernetes cluster.\\n- We used a variant of the ever so popular [HipsterShop](https://github.com/GoogleCloudPlatform/microservices-demo) demo application. It was originally written by Google to showcase Kubernetes across a multitude of variants available such as the [OpenTelemetry Demo App](https://github.com/open-telemetry/opentelemetry-demo). To use the app, please go [here](https://github.com/bshetti/opentelemetry-microservices-demo/tree/main/deploy-with-collector-k8s) and follow the instructions to deploy. You don’t need to deploy otelcollector for Kubernetes metrics to flow — we will cover this below.\\n- Elastic supports native ingest from Prometheus and FluentD, but in this blog, we are showing a direct ingest from Kubernetes cluster via Elastic Agent. There will be a follow-up blog showing how Elastic can also pull in telemetry from Prometheus or FluentD/bit.\\n\\n## What can you observe and analyze with Elastic?\\n\\nBefore we walk through the steps on getting Elastic set up to ingest and visualize Kubernetes cluster metrics and logs, let’s take a sneak peek at Elastic’s helpful dashboards.\\n\\nAs we noted, we ran a variant of HipsterShop on GKE and deployed Elastic Agents with Kubernetes integration as a DaemonSet on the GKE cluster. Upon deployment of the agents, Elastic starts ingesting metrics from the Kubernetes cluster (specifically from kube-state-metrics) and additionally Elastic will pull all log information from the cluster.\\n\\n### Visualizing Kubernetes metrics on Elastic Observability\\n\\nHere are a few Kubernetes dashboards that will be available out of the box (OOTB) on Elastic Observability.\\n\\n![HipsterShop cluster metrics on Elastic Kubernetes overview dashboard ](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-HipsterShopMetrics-2.png)\\n\\n![HipsterShop default namespace pod dashboard on Elastic Observability](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-HipsterShopDashboard-3.png)\\n\\nIn addition to the cluster overview dashboard and pod dashboard, Elastic has several useful OOTB dashboards:\\n\\n- Kubernetes overview dashboard (see above)\\n- Kubernetes pod dashboard (see above)\\n- Kubernetes nodes dashboard\\n- Kubernetes deployments dashboard\\n- Kubernetes DaemonSets dashboard\\n- Kubernetes StatefulSets dashboards\\n- Kubernetes CronJob & Jobs dashboards\\n- Kubernetes services dashboards\\n- More being added regularly\\n\\nAdditionally, you can either customize these dashboards or build out your own.\\n\\n### Working with logs on Elastic Observability\\n\\n![Kubernetes container logs and Elastic Agent logs](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-Logging-4.png)\\n\\nAs you can see from the screens above, not only can I get Kubernetes cluster metrics, but also all the Kubernetes logs simply by using the Elastic Agent in my Kubernetes cluster.\\n\\n### Prevent, predict, and remediate issues\\n\\nIn addition to helping manage metrics and logs, Elastic can help you detect and predict anomalies across your cluster telemetry. Simply turn on Machine Learning in Elastic against your data and watch it help you enhance your analysis work. As you can see below, Elastic is not only a unified observability location for your Kubernetes cluster logs and metrics, but it also provides extensive true machine learning capabilities to enhance your analysis and management.\\n\\n![Anomaly detection across logs on Elastic Observability](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-AnomalyDetection-5.png)\\n\\n![Analyzing issues on a Kubernetes pod with Elastic Observability ](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-PodIssues-6.png)\\n\\nIn the top graph, you see anomaly detection across logs and it shows something potentially wrong in the September 21 to 23 time period. Dig into the details on the bottom chart by analyzing a single kubernetes.pod.cpu.usage.node metric showing cpu issues early in September and again, later on in the month. You can do more complicated analyses on your cluster telemetry with Machine Learning using multi-metric analysis (versus the single metric issue I am showing above) along with population analysis.\\n\\nElastic gives you better machine learning capabilities to enhance your analysis of Kubernetes cluster telemetry. In the next section, let’s walk through how easy it is to get your telemetry data into Elastic.\\n\\n## Setting it all up\\n\\nLet’s walk through the details of how to get metrics, logs, and traces into Elastic from a HipsterShop application deployed on GKE.\\n\\nFirst, pick your favorite version of Hipstershop — as we noted above, we used a variant of the [OpenTelemetry-Demo](https://github.com/open-telemetry/opentelemetry-demo) because it already has OTel. We slimmed it down for this blog, however (fewer services with some varied languages).\\n\\n### Step 0: Get an account on Elastic Cloud\\n\\nFollow the instructions to [get started on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home).\\n\\n![](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-FreeElasticCloud-7.png)\\n\\n### Step 1: Get a Kubernetes cluster and load your Kubernetes app into your cluster\\n\\nGet your app on a Kubernetes cluster in your Cloud service of choice or local Kubernetes platform. Once your app is up on Kubernetes, you should have the following pods (or some variant) running on the default namespace.\\n\\n```yaml\\nNAME READY STATUS RESTARTS AGE\\nadservice-8694798b7b-jbfxt 1/1 Running 0 4d3h\\ncartservice-67b598697c-hfsxv 1/1 Running 0 4d3h\\ncheckoutservice-994ddc4c4-p9p2s 1/1 Running 0 4d3h\\ncurrencyservice-574f65d7f8-zc4bn 1/1 Running 0 4d3h\\nemailservice-6db78645b5-ppmdk 1/1 Running 0 4d3h\\nfrontend-5778bfc56d-jjfxg 1/1 Running 0 4d3h\\njaeger-686c775fbd-7d45d 1/1 Running 0 4d3h\\nloadgenerator-c8f76d8db-gvrp7 1/1 Running 0 4d3h\\notelcollector-5b87f4f484-4wbwn 1/1 Running 0 4d3h\\npaymentservice-6888bb469c-nblqj 1/1 Running 0 4d3h\\nproductcatalogservice-66478c4b4-ff5qm 1/1 Running 0 4d3h\\nrecommendationservice-648978746-8bzxc 1/1 Running 0 4d3h\\nredis-cart-96d48485f-gpgxd 1/1 Running 0 4d3h\\nshippingservice-67fddb767f-cq97d 1/1 Running 0 4d3h\\n```\\n\\n### Step 2: Turn on kube-state-metrics\\n\\nNext you will need to turn on [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics).\\n\\nFirst:\\n\\n```bash\\ngit clone https://github.com/kubernetes/kube-state-metrics.git\\n```\\n\\nNext, in the kube-state-metrics directory under the examples directory, just apply the standard config.\\n\\n```bash\\nkubectl apply -f ./standard\\n```\\n\\nThis will turn on kube-state-metrics, and you should see a pod similar to this running in kube-system namespace.\\n\\n```yaml\\nkube-state-metrics-5f9dc77c66-qjprz 1/1 Running 0 4d4h\\n```\\n\\n### Step 3: Install the Elastic Agent with Kubernetes integration\\n\\n**Add Kubernetes Integration:**\\n\\n1. ![](https://images.contentstack.io/v3/assets/bltefdd0b53724fa2ce/blt5a3ae745e98b9e37/635691670a58db35cbdbc0f6/ManagingKubernetes-Addk8sButton-8.png)\\n2. In Elastic, go to integrations and select the Kubernetes Integration, and select to Add Kubernetes.\\n3. Select a name for the Kubernetes integration.\\n4. Turn on kube-state-metrics in the configuration screen.\\n5. Give the configuration a name in the new-agent-policy-name text box.\\n6. Save the configuration. The integration with a policy is now created.\\n\\nYou can read up on the agent policies and how they are used on the Elastic Agent [here](https://www.elastic.co/guide/en/fleet/current/agent-policy.html).\\n\\n![](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-K8sIntegration-9.png)\\n\\n![](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-FleetManagement-10.png)\\n\\n1. Add Kubernetes integration.\\n2. Select the policy you just created in the second.\\n3. In the third step of Add Agent instructions, copy and paste or download the manifest.\\n4. Add manifest to the shell where you have kubectl running, save it as elastic-agent-managed-kubernetes.yaml, and run the following command.\\n\\n```yaml\\nkubectl apply -f elastic-agent-managed-kubernetes.yaml\\n```\\n\\nYou should see a number of agents come up as part of a DaemonSet in kube-system namespace.\\n\\n```yaml\\nNAME READY STATUS RESTARTS AGE\\nelastic-agent-qr6hj 1/1 Running 0 4d7h\\nelastic-agent-sctmz 1/1 Running 0 4d7h\\nelastic-agent-x6zkw 1/1 Running 0 4d7h\\nelastic-agent-zc64h 1/1 Running 0 4d7h\\n```\\n\\nIn my cluster, I have four nodes and four elastic-agents started as part of the DaemonSet.\\n\\n### Step 4: Look at Elastic out of the box dashboards (OOTB) for Kubernetes metrics and start discovering Kubernetes logs\\n\\nThat is it. You should see metrics flowing into all the dashboards. To view logs for specific pods, simply go into Discover in Kibana and search for a specific pod name.\\n\\n![HipsterShop cluster metrics on Elastic Kubernetes overview dashboard](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-HipsterShopMetrics-2.png)\\n\\n![Hipstershop default namespace pod dashboard on Elastic Observability](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-HipsterShopDashboard-3.png)\\n\\nAdditionally, you can browse all the pod logs directly in Elastic.\\n\\n![frontendService and cartService logs](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKurbenetes-PodLogs-11.png)\\n\\nIn the above example, I searched for frontendService and cartService logs.\\n\\n### Step 5: Bonus!\\n\\nBecause we were using an OTel based application, Elastic can even pull in the application traces. But that is a discussion for another blog.\\n\\nHere is a quick peek at what Hipster Shop’s traces for a front end transaction look like in Elastic Observability.\\n\\n![Trace for Checkout transaction for HipsterShop](/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-CheckOutTransaction-12.png)\\n\\n## Conclusion: Elastic Observability rocks for Kubernetes monitoring\\n\\nI hope you’ve gotten an appreciation for how Elastic Observability can help you manage Kubernetes clusters along with the complexity of the metrics, log, and trace data it generates for even a simple deployment.\\n\\nA quick recap of lessons and more specifically learned:\\n\\n- How [Elastic Cloud](https://cloud.elastic.co) can aggregate and ingest telemetry data through the Elastic Agent, which is easily deployed on your cluster as a DaemonSet and retrieves metrics from the host, such as system metrics, container stats, and metrics from all services running on top of Kubernetes\\n- Show what Elastic brings from a unified telemetry experience (Kubernenetes logs, metrics, traces) across all your Kubernetes cluster components (pods, nodes, services, any namespace, and more).\\n- Interest in exploring Elastic’s ML capabilities which will reduce your **MTTHH** (mean time to happy hour)\\n\\nReady to get started? [Register](https://cloud.elastic.co/registration) and try out the features and capabilities I’ve outlined above.\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var y=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),f=(t,e)=>{for(var i in e)a(t,i,{get:e[i],enumerable:!0})},o=(t,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let s of p(e))!b.call(t,s)&&s!==i&&a(t,s,{get:()=>e[s],enumerable:!(r=g(e,s))||r.enumerable});return t};var w=(t,e,i)=>(i=t!=null?u(m(t)):{},o(e||!t||!t.__esModule?a(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>o(a({},\\"__esModule\\",{value:!0}),t);var c=y((S,l)=>{l.exports=_jsx_runtime});var k={};f(k,{default:()=>h,frontmatter:()=>K});var n=w(c()),K={title:\\"Managing your Kubernetes cluster with Elastic Observability\\",slug:\\"kubernetes-cluster-metrics-logs-monitoring\\",date:\\"2022-10-24\\",description:\\"Unify all of your Kubernetes metrics, log, and trace data on a single platform and dashboard, Elastic. From the infrastructure to the application layer Elastic Observability makes it easier for you to understand how your cluster is performing.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"ManagingKubernetes-ElasticAgentIntegration-1.png\\",tags:[{slug:\\"kubernetes\\"},{slug:\\"google-cloud\\"},{slug:\\"gke\\"},{slug:\\"metrics\\"},{slug:\\"elastic-agent\\"}]};function d(t){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"As an operations engineer (SRE, IT manager, DevOps), you\\\\u2019re always struggling with how to manage technology and data sprawl. Kubernetes is becoming increasingly pervasive and a majority of these deployments will be in Amazon Elastic Kubernetes Service (EKS), Google Kubernetes Engine (GKE), or Azure Kubernetes Service (AKS). Some of you may be on a single cloud while others will have the added burden of managing clusters on multiple Kubernetes cloud services. In addition to cloud provider complexity, you also have to manage hundreds of deployed services generating more and more observability and telemetry data.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The day-to-day operations of understanding the status and health of your Kubernetes clusters and applications running on them, through the logs, metrics, and traces they generate, will likely be your biggest challenge. But as an operations engineer you will need all of that important data to help prevent, predict, and remediate issues. And you certainly don\\\\u2019t need that volume of metrics, logs and traces spread across multiple tools when you need to visualize and analyze Kubernetes telemetry data for troubleshooting and support.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic Observability helps manage the sprawl of Kubernetes metrics and logs by providing extensive and centralized observability capabilities beyond just the logging that we are known for. Elastic Observability provides you with granular insights and context into the behavior of your Kubernetes clusters along with the applications running on them by unifying all of your metrics, log, and trace data through OpenTelemetry and APM agents.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Regardless of the cluster location (EKS, GKE, AKS, self-managed) or application, \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/what-is/kubernetes-monitoring\\",rel:\\"nofollow\\",children:\\"Kubernetes monitoring\\"}),\\" is made simple with Elastic Observability. All of the node, pod, container, application, and infrastructure (AWS, GCP, Azure) metrics, infrastructure and application logs, along with application traces are available in Elastic Observability.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog we will show:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"How \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" can aggregate and ingest metrics and log data through the Elastic Agent (easily deployed on your cluster as a DaemonSet) to retrieve logs and metrics from the host (system metrics, container stats) along with logs from all services running on top of Kubernetes.\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"How Elastic Observability can bring a unified telemetry experience (logs, metrics,traces) across all your Kubernetes cluster components (pods, nodes, services, namespaces, and more).\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-ElasticAgentIntegration-1.png\\",alt:\\"Elastic Agent with Kubernetes Integration\\",width:\\"512\\",height:\\"303\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Ensure you have an account on \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\").\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"While we used GKE, you can use any location for your Kubernetes cluster.\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"We used a variant of the ever so popular \\",(0,n.jsx)(e.a,{href:\\"https://github.com/GoogleCloudPlatform/microservices-demo\\",rel:\\"nofollow\\",children:\\"HipsterShop\\"}),\\" demo application. It was originally written by Google to showcase Kubernetes across a multitude of variants available such as the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Demo App\\"}),\\". To use the app, please go \\",(0,n.jsx)(e.a,{href:\\"https://github.com/bshetti/opentelemetry-microservices-demo/tree/main/deploy-with-collector-k8s\\",rel:\\"nofollow\\",children:\\"here\\"}),\\" and follow the instructions to deploy. You don\\\\u2019t need to deploy otelcollector for Kubernetes metrics to flow \\\\u2014 we will cover this below.\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Elastic supports native ingest from Prometheus and FluentD, but in this blog, we are showing a direct ingest from Kubernetes cluster via Elastic Agent. There will be a follow-up blog showing how Elastic can also pull in telemetry from Prometheus or FluentD/bit.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"what-can-you-observe-and-analyze-with-elastic\\",children:\\"What can you observe and analyze with Elastic?\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Before we walk through the steps on getting Elastic set up to ingest and visualize Kubernetes cluster metrics and logs, let\\\\u2019s take a sneak peek at Elastic\\\\u2019s helpful dashboards.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"As we noted, we ran a variant of HipsterShop on GKE and deployed Elastic Agents with Kubernetes integration as a DaemonSet on the GKE cluster. Upon deployment of the agents, Elastic starts ingesting metrics from the Kubernetes cluster (specifically from kube-state-metrics) and additionally Elastic will pull all log information from the cluster.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"visualizing-kubernetes-metrics-on-elastic-observability\\",children:\\"Visualizing Kubernetes metrics on Elastic Observability\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here are a few Kubernetes dashboards that will be available out of the box (OOTB) on Elastic Observability.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-HipsterShopMetrics-2.png\\",alt:\\"HipsterShop cluster metrics on Elastic Kubernetes overview dashboard \\",width:\\"1823\\",height:\\"1092\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-HipsterShopDashboard-3.png\\",alt:\\"HipsterShop default namespace pod dashboard on Elastic Observability\\",width:\\"2007\\",height:\\"1060\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"In addition to the cluster overview dashboard and pod dashboard, Elastic has several useful OOTB dashboards:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Kubernetes overview dashboard (see above)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Kubernetes pod dashboard (see above)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Kubernetes nodes dashboard\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Kubernetes deployments dashboard\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Kubernetes DaemonSets dashboard\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Kubernetes StatefulSets dashboards\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Kubernetes CronJob & Jobs dashboards\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Kubernetes services dashboards\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"More being added regularly\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Additionally, you can either customize these dashboards or build out your own.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"working-with-logs-on-elastic-observability\\",children:\\"Working with logs on Elastic Observability\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-Logging-4.png\\",alt:\\"Kubernetes container logs and Elastic Agent logs\\",width:\\"2011\\",height:\\"998\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As you can see from the screens above, not only can I get Kubernetes cluster metrics, but also all the Kubernetes logs simply by using the Elastic Agent in my Kubernetes cluster.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"prevent-predict-and-remediate-issues\\",children:\\"Prevent, predict, and remediate issues\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In addition to helping manage metrics and logs, Elastic can help you detect and predict anomalies across your cluster telemetry. Simply turn on Machine Learning in Elastic against your data and watch it help you enhance your analysis work. As you can see below, Elastic is not only a unified observability location for your Kubernetes cluster logs and metrics, but it also provides extensive true machine learning capabilities to enhance your analysis and management.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-AnomalyDetection-5.png\\",alt:\\"Anomaly detection across logs on Elastic Observability\\",width:\\"1724\\",height:\\"881\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-PodIssues-6.png\\",alt:\\"Analyzing issues on a Kubernetes pod with Elastic Observability \\",width:\\"1839\\",height:\\"922\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"In the top graph, you see anomaly detection across logs and it shows something potentially wrong in the September 21 to 23 time period. Dig into the details on the bottom chart by analyzing a single kubernetes.pod.cpu.usage.node metric showing cpu issues early in September and again, later on in the month. You can do more complicated analyses on your cluster telemetry with Machine Learning using multi-metric analysis (versus the single metric issue I am showing above) along with population analysis.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic gives you better machine learning capabilities to enhance your analysis of Kubernetes cluster telemetry. In the next section, let\\\\u2019s walk through how easy it is to get your telemetry data into Elastic.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"setting-it-all-up\\",children:\\"Setting it all up\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\\\\u2019s walk through the details of how to get metrics, logs, and traces into Elastic from a HipsterShop application deployed on GKE.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"First, pick your favorite version of Hipstershop \\\\u2014 as we noted above, we used a variant of the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry-Demo\\"}),\\" because it already has OTel. We slimmed it down for this blog, however (fewer services with some varied languages).\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-0-get-an-account-on-elastic-cloud\\",children:\\"Step 0: Get an account on Elastic Cloud\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Follow the instructions to \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-FreeElasticCloud-7.png\\",alt:\\"\\",width:\\"512\\",height:\\"379\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-1-get-a-kubernetes-cluster-and-load-your-kubernetes-app-into-your-cluster\\",children:\\"Step 1: Get a Kubernetes cluster and load your Kubernetes app into your cluster\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Get your app on a Kubernetes cluster in your Cloud service of choice or local Kubernetes platform. Once your app is up on Kubernetes, you should have the following pods (or some variant) running on the default namespace.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`NAME READY STATUS RESTARTS AGE\\nadservice-8694798b7b-jbfxt 1/1 Running 0 4d3h\\ncartservice-67b598697c-hfsxv 1/1 Running 0 4d3h\\ncheckoutservice-994ddc4c4-p9p2s 1/1 Running 0 4d3h\\ncurrencyservice-574f65d7f8-zc4bn 1/1 Running 0 4d3h\\nemailservice-6db78645b5-ppmdk 1/1 Running 0 4d3h\\nfrontend-5778bfc56d-jjfxg 1/1 Running 0 4d3h\\njaeger-686c775fbd-7d45d 1/1 Running 0 4d3h\\nloadgenerator-c8f76d8db-gvrp7 1/1 Running 0 4d3h\\notelcollector-5b87f4f484-4wbwn 1/1 Running 0 4d3h\\npaymentservice-6888bb469c-nblqj 1/1 Running 0 4d3h\\nproductcatalogservice-66478c4b4-ff5qm 1/1 Running 0 4d3h\\nrecommendationservice-648978746-8bzxc 1/1 Running 0 4d3h\\nredis-cart-96d48485f-gpgxd 1/1 Running 0 4d3h\\nshippingservice-67fddb767f-cq97d 1/1 Running 0 4d3h\\n`})}),`\\n`,(0,n.jsxs)(e.h3,{id:\\"step-2-turn-on-kube-state-metrics\\",children:[\\"Step 2: Turn on \\",(0,n.jsx)(\\"a\\",{href:\\"https://github.com/kubernetes/kube-state-metrics\\",target:\\"_self\\",children:\\"kube-state-metrics\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Next you will need to turn on \\",(0,n.jsx)(e.a,{href:\\"https://github.com/kubernetes/kube-state-metrics\\",rel:\\"nofollow\\",children:\\"kube-state-metrics\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"First:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/kubernetes/kube-state-metrics.git\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Next, in the kube-state-metrics directory under the examples directory, just apply the standard config.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl apply -f ./standard\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This will turn on kube-state-metrics, and you should see a pod similar to this running in kube-system namespace.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`kube-state-metrics-5f9dc77c66-qjprz 1/1 Running 0 4d4h\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-3-install-the-elastic-agent-with-kubernetes-integration\\",children:\\"Step 3: Install the Elastic Agent with Kubernetes integration\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Add Kubernetes Integration:\\"})}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.img,{src:\\"https://images.contentstack.io/v3/assets/bltefdd0b53724fa2ce/blt5a3ae745e98b9e37/635691670a58db35cbdbc0f6/ManagingKubernetes-Addk8sButton-8.png\\",alt:\\"\\"})}),`\\n`,(0,n.jsx)(e.li,{children:\\"In Elastic, go to integrations and select the Kubernetes Integration, and select to Add Kubernetes.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Select a name for the Kubernetes integration.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Turn on kube-state-metrics in the configuration screen.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Give the configuration a name in the new-agent-policy-name text box.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Save the configuration. The integration with a policy is now created.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You can read up on the agent policies and how they are used on the Elastic Agent \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/agent-policy.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-K8sIntegration-9.png\\",alt:\\"\\",width:\\"1088\\",height:\\"1160\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-FleetManagement-10.png\\",alt:\\"\\",width:\\"1091\\",height:\\"1155\\"})}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Add Kubernetes integration.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Select the policy you just created in the second.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"In the third step of Add Agent instructions, copy and paste or download the manifest.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Add manifest to the shell where you have kubectl running, save it as elastic-agent-managed-kubernetes.yaml, and run the following command.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`kubectl apply -f elastic-agent-managed-kubernetes.yaml\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You should see a number of agents come up as part of a DaemonSet in kube-system namespace.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`NAME READY STATUS RESTARTS AGE\\nelastic-agent-qr6hj 1/1 Running 0 4d7h\\nelastic-agent-sctmz 1/1 Running 0 4d7h\\nelastic-agent-x6zkw 1/1 Running 0 4d7h\\nelastic-agent-zc64h 1/1 Running 0 4d7h\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"In my cluster, I have four nodes and four elastic-agents started as part of the DaemonSet.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-4-look-at-elastic-out-of-the-box-dashboards-ootb-for-kubernetes-metrics-and-start-discovering-kubernetes-logs\\",children:\\"Step 4: Look at Elastic out of the box dashboards (OOTB) for Kubernetes metrics and start discovering Kubernetes logs\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"That is it. You should see metrics flowing into all the dashboards. To view logs for specific pods, simply go into Discover in Kibana and search for a specific pod name.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-HipsterShopMetrics-2.png\\",alt:\\"HipsterShop cluster metrics on Elastic Kubernetes overview dashboard\\",width:\\"1823\\",height:\\"1092\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-HipsterShopDashboard-3.png\\",alt:\\"Hipstershop default namespace pod dashboard on Elastic Observability\\",width:\\"2007\\",height:\\"1060\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Additionally, you can browse all the pod logs directly in Elastic.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKurbenetes-PodLogs-11.png\\",alt:\\"frontendService and cartService logs\\",width:\\"1903\\",height:\\"1172\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"In the above example, I searched for frontendService and cartService logs.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-5-bonus\\",children:\\"Step 5: Bonus!\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Because we were using an OTel based application, Elastic can even pull in the application traces. But that is a discussion for another blog.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here is a quick peek at what Hipster Shop\\\\u2019s traces for a front end transaction look like in Elastic Observability.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-CheckOutTransaction-12.png\\",alt:\\"Trace for Checkout transaction for HipsterShop\\",width:\\"1659\\",height:\\"1040\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion-elastic-observability-rocks-for-kubernetes-monitoring\\",children:\\"Conclusion: Elastic Observability rocks for Kubernetes monitoring\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"I hope you\\\\u2019ve gotten an appreciation for how Elastic Observability can help you manage Kubernetes clusters along with the complexity of the metrics, log, and trace data it generates for even a simple deployment.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"A quick recap of lessons and more specifically learned:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"How \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" can aggregate and ingest telemetry data through the Elastic Agent, which is easily deployed on your cluster as a DaemonSet and retrieves metrics from the host, such as system metrics, container stats, and metrics from all services running on top of Kubernetes\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Show what Elastic brings from a unified telemetry experience (Kubernenetes logs, metrics, traces) across all your Kubernetes cluster components (pods, nodes, services, any namespace, and more).\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Interest in exploring Elastic\\\\u2019s ML capabilities which will reduce your \\",(0,n.jsx)(e.strong,{children:\\"MTTHH\\"}),\\" (mean time to happy hour)\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Ready to get started? \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Register\\"}),\\" and try out the features and capabilities I\\\\u2019ve outlined above.\\"]})]})}function h(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(d,{...t})}):d(t)}return v(k);})();\\n;return Component;"},"_id":"articles/kubernetes-cluster-metrics-logs-monitoring.mdx","_raw":{"sourceFilePath":"articles/kubernetes-cluster-metrics-logs-monitoring.mdx","sourceFileName":"kubernetes-cluster-metrics-logs-monitoring.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/kubernetes-cluster-metrics-logs-monitoring"},"type":"Article","imageUrl":"/assets/images/kubernetes-cluster-metrics-logs-monitoring/ManagingKubernetes-ElasticAgentIntegration-1.png","readingTime":"13 min read","url":"/kubernetes-cluster-metrics-logs-monitoring","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"What can you observe and analyze with Elastic?","href":"#what-can-you-observe-and-analyze-with-elastic"},{"level":3,"title":"Visualizing Kubernetes metrics on Elastic Observability","href":"#visualizing-kubernetes-metrics-on-elastic-observability"},{"level":3,"title":"Working with logs on Elastic Observability","href":"#working-with-logs-on-elastic-observability"},{"level":3,"title":"Prevent, predict, and remediate issues","href":"#prevent-predict-and-remediate-issues"},{"level":2,"title":"Setting it all up","href":"#setting-it-all-up"},{"level":3,"title":"Step 0: Get an account on Elastic Cloud","href":"#step-0-get-an-account-on-elastic-cloud"},{"level":3,"title":"Step 1: Get a Kubernetes cluster and load your Kubernetes app into your cluster","href":"#step-1-get-a-kubernetes-cluster-and-load-your-kubernetes-app-into-your-cluster"},{"level":3,"title":"Step 2: Turn on kube-state-metrics","href":"#step-2-turn-on-a-hrefhttpsgithubcomkuberneteskube-state-metrics-target_selfkube-state-metricsa"},{"level":3,"title":"Step 3: Install the Elastic Agent with Kubernetes integration","href":"#step-3-install-the-elastic-agent-with-kubernetes-integration"},{"level":3,"title":"Step 4: Look at Elastic out of the box dashboards (OOTB) for Kubernetes metrics and start discovering Kubernetes logs","href":"#step-4-look-at-elastic-out-of-the-box-dashboards-ootb-for-kubernetes-metrics-and-start-discovering-kubernetes-logs"},{"level":3,"title":"Step 5: Bonus!","href":"#step-5-bonus"},{"level":2,"title":"Conclusion: Elastic Observability rocks for Kubernetes monitoring","href":"#conclusion-elastic-observability-rocks-for-kubernetes-monitoring"}]},{"title":"Gain insights into Kubernetes errors with Elastic Observability logs and OpenAI","slug":"kubernetes-errors-observability-logs-openai","date":"2023-05-18","description":"This blog post provides an example of how one can analyze error messages in Elasticsearch with ChatGPT using the OpenAI API via Elasticsearch.","image":"blog-elastic-configuration.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"azure","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"aks","type":"Tag","_raw":{}},{"slug":"openai","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs we’ve shown in previous blogs, Elastic\xae provides a way to ingest and manage telemetry from the [Kubernetes cluster](https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring) and the [application](https://www.elastic.co/blog/opentelemetry-observability) running on it. Elastic provides out-of-the-box dashboards to help with tracking metrics, [log management and analytics](https://www.elastic.co/blog/log-management-observability-operations), [APM functionality](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment) (which also supports [native OpenTelemetry](https://www.elastic.co/blog/opentelemetry-observability)), and the ability to analyze everything with [AIOps features](https://www.elastic.co/blog/observability-logs-machine-learning-aiops) and [machine learning](https://www.elastic.co/what-is/elasticsearch-machine-learning?elektra=home) (ML). While you can use pre-existing [ML models in Elastic](https://www.elastic.co/blog/improving-information-retrieval-elastic-stack-search-relevance), [out-of-the-box AIOps features](https://www.elastic.co/blog/aiops-automation-analytics-elastic-observability-use-cases), or your own ML models, there is a need to dig deeper into the root cause of an issue.\\n\\nElastic helps reduce the operational work to support more efficient operations, but users still need a way to investigate and understand everything from the cause of an issue to the meaning of specific error messages. As an operations user, if you haven’t run into a particular error before or it\'s part of some runbook, you will likely go to Google and start searching for information.\\n\\nOpenAI’s ChatGPT is becoming an interesting generative AI tool that helps provide more information using the models behind it. What if you could use OpenAI to obtain deeper insights (even simple semantics) for an error in your production or development environment? You can easily tie Elastic to OpenAI’s API to achieve this.\\n\\nKubernetes, a mainstay in most deployments (on-prem or in a cloud service provider) requires a significant amount of expertise — even if that expertise is to manage a service like GKE, EKS, or AKS.\\n\\nIn this blog, I will cover how you can use [Elastic’s watcher](https://www.elastic.co/guide/en/kibana/current/watcher-ui.html) capability to connect Elastic to OpenAI and ask it for more information about the error logs Elastic is ingesting from a Kubernetes cluster(s). More specifically, we will use [Azure’s OpenAI Service](https://azure.microsoft.com/en-us/products/cognitive-services/openai-service). Azure OpenAI is a partnership between Microsoft and OpenAI, so the same models from OpenAI are available in the Microsoft version.\\n\\n![elastic azure openai](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-azure-openai.png)\\n\\nWhile this blog goes over a specific example, it can be modified for other types of errors Elastic receives in logs. Whether it\'s from AWS, the application, databases, etc., the configuration and script described in this blog can be modified easily.\\n\\n## Prerequisites and config\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up the configuration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)).\\n- We used a GCP GKE Kubernetes cluster, but you can use any Kubernetes cluster service (on-prem or cloud based) of your choice.\\n- We’re also running with a version of the OpenTelemetry Demo. Directions for using Elastic with OpenTelemetry Demo are [here](https://github.com/elastic/opentelemetry-demo).\\n- We also have an Azure account and [Azure OpenAI service configured](https://azure.microsoft.com/en-us/products/cognitive-services/openai-service). You will need to get the appropriate tokens from Azure and the proper URL endpoint from Azure’s OpenAI service.\\n- We will use [Elastic’s dev tools](https://www.elastic.co/guide/en/kibana/current/devtools-kibana.html), the console to be specific, to load up and run the script, which is an [Elastic watcher](https://www.elastic.co/guide/en/kibana/current/watcher-ui.html).\\n- We will also add a new index to store the results from the OpenAI query.\\n\\nHere is the configuration we will set up in this blog:\\n\\n![Configuration to analyze Kubernetes cluster errors](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-configuration.png)\\n\\nAs we walk through the setup, we’ll also provide the alternative setup with OpenAI versus Azure OpenAI Service.\\n\\n## Setting it all up\\n\\nOver the next few steps, I’ll walk through:\\n\\n- Getting an account on Elastic Cloud and setting up your K8S cluster and application\\n- Gaining Azure OpenAI authorization (alternative option with OpenAI)\\n- Identifying Kubernetes error logs\\n- Configuring the watcher with the right script\\n- Comparing the output from Azure OpenAI/OpenAI versus ChatGPT UI\\n\\n### Step 0: Create an account on Elastic Cloud\\n\\nFollow the instructions to [get started on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home).\\n\\n![elastic start cloud trial](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-start-cloud-trial.png)\\n\\nOnce you have the Elastic Cloud login, set up your Kubernetes cluster and application. A complete step-by-step instructions blog is available [here](https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring). This also provides an overview of how to see Kubernetes cluster metrics in Elastic and how to monitor them with dashboards.\\n\\n### Step 1: Azure OpenAI Service and authorization\\n\\nWhen you log in to your Azure subscription and set up an instance of Azure OpenAI Service, you will be able to get your keys under Manage Keys.\\n\\n![microsoft azure manage keys](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-microsoft-azure-manage-keys.png)\\n\\nThere are two keys for your OpenAI instance, but you only need KEY 1 .\\n\\n![Used with permission from Microsoft.](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-pme-openai-keys-and-endpoint.png)\\n\\nAdditionally, you will need to get the service URL. See the image above with our service URL blanked out to understand where to get the KEY 1 and URL.\\n\\nIf you are not using Azure OpenAI Service and the standard OpenAI service, then you can get your keys at:\\n\\n```bash\\n**https** ://platform.openai.com/account/api-keys\\n```\\n\\n![api keys](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-api-keys.png)\\n\\nYou will need to create a key and save it. Once you have the key, you can go to Step 2.\\n\\n### Step 2: Identifying Kubernetes errors in Elastic logs\\n\\nAs your Kubernetes cluster is running, [Elastic’s Kubernetes integration](https://docs.elastic.co/en/integrations/kubernetes) running on the Elastic agent daemon set on your cluster is sending logs and metrics to Elastic. [The telemetry is ingested, processed, and indexed](https://www.elastic.co/blog/log-monitoring-management-enterprise). Kubernetes logs are stored in an index called .ds-logs-kubernetes.container_logs-default-\\\\* (\\\\* is for the date), and an automatic data stream logs-kubernetes.container_logs is also pre-loaded. So while you can use some of the out-of-the-box dashboards to investigate the metrics, you can also look at all the logs in Elastic Discover.\\n\\nWhile any error from Kubernetes can be daunting, the more nuanced issues occur with errors from the pods running in the kube-system namespace. Take the pod konnectivity agent, which is essentially a network proxy agent running on the node to help establish tunnels and is a vital component in Kubernetes. Any error will cause the cluster to have connectivity issues and lead to a cascade of issues, so it’s important to understand and troubleshoot these errors.\\n\\nWhen we filter out for error logs from the konnectivity agent, we see a good number of errors.\\n\\n![expanded document](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-expanded-document.png)\\n\\nBut unfortunately, we still can’t understand what these errors mean.\\n\\nEnter OpenAI to help us understand the issue better. Generally, you would take the error message from Discover and paste it with a question in ChatGPT (or run a Google search on the message).\\n\\nOne error in particular that we’ve run into but do not understand is:\\n\\n```bash\\nE0510 02:51:47.138292 1 client.go:388] could not read stream err=rpc error: code = Unavailable desc = error reading from server: read tcp 10.120.0.8:46156->35.230.74.219:8132: read: connection timed out serverID=632d489f-9306-4851-b96b-9204b48f5587 agentID=e305f823-5b03-47d3-a898-70031d9f4768\\n```\\n\\nThe OpenAI output is as follows:\\n\\n![openai output](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-openai-output.png)\\n\\nChatGPT has given us a fairly nice set of ideas on why this rpc error is occurring against our konnectivity-agent.\\n\\nSo how can we get this output automatically for any error when those errors occur?\\n\\n### Step 3: Configuring the watcher with the right script\\n\\n[What is an Elastic watcher?](https://www.elastic.co/guide/en/kibana/current/watcher-ui.html) Watcher is an Elasticsearch feature that you can use to create actions based on conditions, which are periodically evaluated using queries on your data. Watchers are helpful for analyzing mission-critical and business-critical streaming data. For example, you might watch application logs for errors causing larger operational issues.\\n\\nOnce a watcher is configured, it can be:\\n\\n1. Manually triggered\\n2. Run periodically\\n3. Created using a UI or a script\\n\\nIn this scenario, we will use a script, as we can modify it easily and run it as needed.\\n\\nWe’re using the DevTools Console to enter the script and test it out:\\n\\n![test script](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-test-script.png)\\n\\nThe script is listed at the end of the blog in the **appendix**. It can also be downloaded [**here**](https://github.com/elastic/chatgpt-error-analysis) **.**\\n\\nThe script does the following:\\n\\n1. It runs continuously every five minutes.\\n2. It will search the logs for errors from the container konnectivity-agent.\\n3. It will take the first error’s message, transform it (re-format and clean up), and place it into a variable first_hit.\\n\\n```json\\n\\"script\\": \\"return [\'first_hit\': ctx.payload.first.hits.hits.0._source.message.replace(\'\\\\\\"\', \\\\\\"\\\\\\")]\\"\\n```\\n\\n4. The error message is sent into OpenAI with a query:\\n\\n```yaml\\nWhat are the potential reasons for the following kubernetes error:\\n { { ctx.payload.second.first_hit } }\\n```\\n\\n5. If the search yielded an error, it will proceed to then create an index and place the error message, pod.name (which is konnectivity-agent-6676d5695b-ccsmx in our setup), and OpenAI output into a new index called chatgpt_k8_analyzed.\\n\\nTo see the results, we created a new data view called chatgpt_k8_analyzed against the newly created index:\\n\\n![edit data view](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-edit-data-view.png)\\n\\nIn Discover, the output on the data view provides us with the analysis of the errors.\\n\\n![analysis of errors](/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-analysis-of-errors.png)\\n\\nFor every error the script sees in the five minute interval, it will get an analysis of the error. We could alternatively also use a range as needed to analyze during a specific time frame. The script would just need to be modified accordingly.\\n\\n### Step 4. Output from Azure OpenAI/OpenAI vs. ChatGPT UI\\n\\nAs you noticed above, we got relatively the same result from the Azure OpenAI API call as we did by testing out our query in the ChatGPT UI. This is because we configured the API call to run the same/similar model as what was selected in the UI.\\n\\nFor the API call, we used the following parameters:\\n\\n```json\\n\\"request\\": {\\n \\"method\\" : \\"POST\\",\\n \\"Url\\": \\"https://XXX.openai.azure.com/openai/deployments/pme-gpt-35-turbo/chat/completions?api-version=2023-03-15-preview\\",\\n \\"headers\\": {\\"api-key\\" : \\"XXXXXXX\\",\\n \\"content-type\\" : \\"application/json\\"\\n },\\n \\"body\\" : \\"{ \\\\\\"messages\\\\\\": [ { \\\\\\"role\\\\\\": \\\\\\"system\\\\\\", \\\\\\"content\\\\\\": \\\\\\"You are a helpful assistant.\\\\\\"}, { \\\\\\"role\\\\\\": \\\\\\"user\\\\\\", \\\\\\"content\\\\\\": \\\\\\"What are the potential reasons for the following kubernetes error: {{ctx.payload.second.first_hit}}\\\\\\"}], \\\\\\"temperature\\\\\\": 0.5, \\\\\\"max_tokens\\\\\\": 2048}\\" ,\\n \\"connection_timeout\\": \\"60s\\",\\n \\"read_timeout\\": \\"60s\\"\\n }\\n```\\n\\nBy setting the role: system with You are a helpful assistant and using the gpt-35-turbo url portion, we are essentially setting the API to use the davinci model, which is the same as the ChatGPT UI model set by default.\\n\\nAdditionally, for Azure OpenAI Service, you will need to set the URL to something similar the following:\\n\\n```bash\\nhttps://YOURSERVICENAME.openai.azure.com/openai/deployments/pme-gpt-35-turbo/chat/completions?api-version=2023-03-15-preview\\n```\\n\\nIf you use OpenAI (versus Azure OpenAI Service), the request call (against [https://api.openai.com/v1/completions](https://api.openai.com/v1/completions)) would be as such:\\n\\n```json\\n\\"request\\": {\\n \\"scheme\\": \\"https\\",\\n \\"host\\": \\"api.openai.com\\",\\n \\"port\\": 443,\\n \\"method\\": \\"post\\",\\n \\"path\\": \\"\\\\/v1\\\\/completions\\",\\n \\"params\\": {},\\n \\"headers\\": {\\n \\"content-type\\": \\"application\\\\/json\\",\\n \\"authorization\\": \\"Bearer YOUR_ACCESS_TOKEN\\"\\n },\\n \\"body\\": \\"{ \\\\\\"model\\\\\\": \\\\\\"text-davinci-003\\\\\\", \\\\\\"prompt\\\\\\": \\\\\\"What are the potential reasons for the following kubernetes error: {{ctx.payload.second.first_hit}}\\\\\\", \\\\\\"temperature\\\\\\": 1, \\\\\\"max_tokens\\\\\\": 512, \\\\\\"top_p\\\\\\": 1.0, \\\\\\"frequency_penalty\\\\\\": 0.0, \\\\\\"presence_penalty\\\\\\": 0.0 }\\",\\n \\"connection_timeout_in_millis\\": 60000,\\n \\"read_timeout_millis\\": 60000\\n }\\n```\\n\\nIf you are interested in creating a more OpenAI-based version, you can [download an alternative script](https://elastic-content-share.eu/downloads/watcher-job-to-integrate-chatgpt-in-elasticsearch/) and look at [another blog from an Elastic community member](https://mar1.hashnode.dev/unlocking-the-power-of-aiops-with-chatgpt-and-elasticsearch).\\n\\n## Gaining other insights beyond Kubernetes logs\\n\\nNow that the script is up and running, you can modify it using different:\\n\\n- Inputs\\n- Conditions\\n- Actions\\n- Transforms\\n\\nLearn more on how to modify it [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/xpack-alerting.html). Some examples of modifications could include:\\n\\n1. Look for error logs from application components (e.g., cartService, frontEnd, from the OTel demo), cloud service providers (e.g., AWS/Azure/GCP logs), and even logs from components such as Kafka, databases, etc.\\n2. Vary the time frame from running continuously to running over a specific [range](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-range-query.html).\\n3. Look for specific errors in the logs.\\n4. Query for analysis on a set of errors at once versus just one, which we demonstrated.\\n\\nThe modifications are endless, and of course you can run this with OpenAI rather than Azure OpenAI Service.\\n\\n## Conclusion\\n\\nI hope you’ve gotten an appreciation for how Elastic Observability can help you connect to OpenAI services (Azure OpenAI, as we showed, or even OpenAI) to better analyze an error log message instead of having to run several Google searches and hunt for possible insights.\\n\\nHere’s a quick recap of what we covered:\\n\\n- Developing an Elastic watcher script that can be used to find and send Kubernetes errors into OpenAI and insert them into a new index\\n- Configuring Azure OpenAI Service or OpenAI with the right authorization and request parameters\\n\\nReady to get started? Sign up [for Elastic Cloud](https://cloud.elastic.co/registration) and try out the features and capabilities I’ve outlined above to get the most value and visibility out of your OpenTelemetry data.\\n\\n## Appendix\\n\\nWatcher script\\n\\n```bash\\nPUT _watcher/watch/chatgpt_analysis\\n{\\n \\"trigger\\": {\\n \\"schedule\\": {\\n \\"interval\\": \\"5m\\"\\n }\\n },\\n \\"input\\": {\\n \\"chain\\": {\\n \\"inputs\\": [\\n {\\n \\"first\\": {\\n \\"search\\": {\\n \\"request\\": {\\n \\"search_type\\": \\"query_then_fetch\\",\\n \\"indices\\": [\\n \\"logs-kubernetes*\\"\\n ],\\n \\"rest_total_hits_as_int\\": true,\\n \\"body\\": {\\n \\"query\\": {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"match\\": {\\n \\"kubernetes.container.name\\": \\"konnectivity-agent\\"\\n }\\n },\\n {\\n \\"match\\" : {\\n \\"message\\":\\"error\\"\\n }\\n }\\n ]\\n }\\n },\\n \\"size\\": \\"1\\"\\n }\\n }\\n }\\n }\\n },\\n {\\n \\"second\\": {\\n \\"transform\\": {\\n \\"script\\": \\"return [\'first_hit\': ctx.payload.first.hits.hits.0._source.message.replace(\'\\\\\\"\', \\\\\\"\\\\\\")]\\"\\n }\\n }\\n },\\n {\\n \\"third\\": {\\n \\"http\\": {\\n \\"request\\": {\\n \\"method\\" : \\"POST\\",\\n \\"url\\": \\"https://XXX.openai.azure.com/openai/deployments/pme-gpt-35-turbo/chat/completions?api-version=2023-03-15-preview\\",\\n \\"headers\\": {\\n \\"api-key\\" : \\"XXX\\",\\n \\"content-type\\" : \\"application/json\\"\\n },\\n \\"body\\" : \\"{ \\\\\\"messages\\\\\\": [ { \\\\\\"role\\\\\\": \\\\\\"system\\\\\\", \\\\\\"content\\\\\\": \\\\\\"You are a helpful assistant.\\\\\\"}, { \\\\\\"role\\\\\\": \\\\\\"user\\\\\\", \\\\\\"content\\\\\\": \\\\\\"What are the potential reasons for the following kubernetes error: {{ctx.payload.second.first_hit}}\\\\\\"}], \\\\\\"temperature\\\\\\": 0.5, \\\\\\"max_tokens\\\\\\": 2048}\\" ,\\n \\"connection_timeout\\": \\"60s\\",\\n \\"read_timeout\\": \\"60s\\"\\n }\\n }\\n }\\n }\\n ]\\n }\\n },\\n \\"condition\\": {\\n \\"compare\\": {\\n \\"ctx.payload.first.hits.total\\": {\\n \\"gt\\": 0\\n }\\n }\\n },\\n \\"actions\\": {\\n \\"index_payload\\" : {\\n \\"transform\\": {\\n \\"script\\": {\\n \\"source\\": \\"\\"\\"\\n def payload = [:];\\n payload.timestamp = new Date();\\n payload.pod_name = ctx.payload.first.hits.hits[0]._source.kubernetes.pod.name;\\n payload.error_message = ctx.payload.second.first_hit;\\n payload.chatgpt_analysis = ctx.payload.third.choices[0].message.content;\\n return payload;\\n \\"\\"\\"\\n }\\n },\\n \\"index\\" : {\\n \\"index\\" : \\"chatgpt_k8s_analyzed\\"\\n }\\n }\\n }\\n}\\n```\\n\\n### Additional logging resources:\\n\\n- [Getting started with logging on Elastic (quickstart)](https://www.elastic.co/getting-started/observability/collect-and-analyze-logs)\\n- [Ingesting common known logs via integrations (compute node example)](https://www.elastic.co/guide/en/observability/current/logs-metrics-get-started.html)\\n- [List of integrations](https://docs.elastic.co/integrations)\\n- [Ingesting custom application logs into Elastic](https://www.elastic.co/blog/log-monitoring-management-enterprise)\\n- [Enriching logs in Elastic](https://www.elastic.co/blog/observability-logs-parsing-schema-read-write)\\n- Analyzing Logs with [Anomaly Detection (ML)](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability) and [AIOps](https://www.elastic.co/blog/observability-logs-machine-learning-aiops)\\n\\n### Common use case examples with logs:\\n\\n- [Nginx log management](https://youtu.be/ax04ZFWqVCg)\\n- [AWS VPC Flow log management](https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability)\\n- [Using OpenAI to analyze Kubernetes errors](https://www.elastic.co/blog/kubernetes-errors-observability-logs-openai)\\n- [PostgreSQL issue analysis with AIOps](https://youtu.be/Li5TJAWbz8Q)\\n\\n_In this blog post, we may have used third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n\\n_Screenshots of Microsoft products used with permission from Microsoft._\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var i in e)o(t,i,{get:e[i],enumerable:!0})},s=(t,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of g(e))!f.call(t,r)&&r!==i&&o(t,r,{get:()=>e[r],enumerable:!(a=u(e,r))||a.enumerable});return t};var b=(t,e,i)=>(i=t!=null?p(m(t)):{},s(e||!t||!t.__esModule?o(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>s(o({},\\"__esModule\\",{value:!0}),t);var c=w((O,l)=>{l.exports=_jsx_runtime});var k={};y(k,{default:()=>d,frontmatter:()=>A});var n=b(c()),A={title:\\"Gain insights into Kubernetes errors with Elastic Observability logs and OpenAI\\",slug:\\"kubernetes-errors-observability-logs-openai\\",date:\\"2023-05-18\\",description:\\"This blog post provides an example of how one can analyze error messages in Elasticsearch with ChatGPT using the OpenAI API via Elasticsearch.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"blog-elastic-configuration.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"azure\\"},{slug:\\"kubernetes\\"},{slug:\\"aks\\"},{slug:\\"openai\\"},{slug:\\"genai\\"}]};function h(t){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"As we\\\\u2019ve shown in previous blogs, Elastic\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" provides a way to ingest and manage telemetry from the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring\\",rel:\\"nofollow\\",children:\\"Kubernetes cluster\\"}),\\" and the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"application\\"}),\\" running on it. Elastic provides out-of-the-box dashboards to help with tracking metrics, \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/log-management-observability-operations\\",rel:\\"nofollow\\",children:\\"log management and analytics\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"APM functionality\\"}),\\" (which also supports \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"native OpenTelemetry\\"}),\\"), and the ability to analyze everything with \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-machine-learning-aiops\\",rel:\\"nofollow\\",children:\\"AIOps features\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/what-is/elasticsearch-machine-learning?elektra=home\\",rel:\\"nofollow\\",children:\\"machine learning\\"}),\\" (ML). While you can use pre-existing \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/improving-information-retrieval-elastic-stack-search-relevance\\",rel:\\"nofollow\\",children:\\"ML models in Elastic\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aiops-automation-analytics-elastic-observability-use-cases\\",rel:\\"nofollow\\",children:\\"out-of-the-box AIOps features\\"}),\\", or your own ML models, there is a need to dig deeper into the root cause of an issue.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic helps reduce the operational work to support more efficient operations, but users still need a way to investigate and understand everything from the cause of an issue to the meaning of specific error messages. As an operations user, if you haven\\\\u2019t run into a particular error before or it\'s part of some runbook, you will likely go to Google and start searching for information.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"OpenAI\\\\u2019s ChatGPT is becoming an interesting generative AI tool that helps provide more information using the models behind it. What if you could use OpenAI to obtain deeper insights (even simple semantics) for an error in your production or development environment? You can easily tie Elastic to OpenAI\\\\u2019s API to achieve this.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Kubernetes, a mainstay in most deployments (on-prem or in a cloud service provider) requires a significant amount of expertise \\\\u2014 even if that expertise is to manage a service like GKE, EKS, or AKS.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In this blog, I will cover how you can use \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/watcher-ui.html\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s watcher\\"}),\\" capability to connect Elastic to OpenAI and ask it for more information about the error logs Elastic is ingesting from a Kubernetes cluster(s). More specifically, we will use \\",(0,n.jsx)(e.a,{href:\\"https://azure.microsoft.com/en-us/products/cognitive-services/openai-service\\",rel:\\"nofollow\\",children:\\"Azure\\\\u2019s OpenAI Service\\"}),\\". Azure OpenAI is a partnership between Microsoft and OpenAI, so the same models from OpenAI are available in the Microsoft version.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-azure-openai.png\\",alt:\\"elastic azure openai\\",width:\\"1999\\",height:\\"1067\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"While this blog goes over a specific example, it can be modified for other types of errors Elastic receives in logs. Whether it\'s from AWS, the application, databases, etc., the configuration and script described in this blog can be modified easily.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up the configuration:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Ensure you have an account on \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\").\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"We used a GCP GKE Kubernetes cluster, but you can use any Kubernetes cluster service (on-prem or cloud based) of your choice.\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"We\\\\u2019re also running with a version of the OpenTelemetry Demo. Directions for using Elastic with OpenTelemetry Demo are \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"We also have an Azure account and \\",(0,n.jsx)(e.a,{href:\\"https://azure.microsoft.com/en-us/products/cognitive-services/openai-service\\",rel:\\"nofollow\\",children:\\"Azure OpenAI service configured\\"}),\\". You will need to get the appropriate tokens from Azure and the proper URL endpoint from Azure\\\\u2019s OpenAI service.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"We will use \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/devtools-kibana.html\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s dev tools\\"}),\\", the console to be specific, to load up and run the script, which is an \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/watcher-ui.html\\",rel:\\"nofollow\\",children:\\"Elastic watcher\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"We will also add a new index to store the results from the OpenAI query.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here is the configuration we will set up in this blog:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-configuration.png\\",alt:\\"Configuration to analyze Kubernetes cluster errors\\",width:\\"921\\",height:\\"409\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As we walk through the setup, we\\\\u2019ll also provide the alternative setup with OpenAI versus Azure OpenAI Service.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"setting-it-all-up\\",children:\\"Setting it all up\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Over the next few steps, I\\\\u2019ll walk through:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Getting an account on Elastic Cloud and setting up your K8S cluster and application\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Gaining Azure OpenAI authorization (alternative option with OpenAI)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Identifying Kubernetes error logs\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Configuring the watcher with the right script\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Comparing the output from Azure OpenAI/OpenAI versus ChatGPT UI\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-0-create-an-account-on-elastic-cloud\\",children:\\"Step 0: Create an account on Elastic Cloud\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Follow the instructions to \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-start-cloud-trial.png\\",alt:\\"elastic start cloud trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Once you have the Elastic Cloud login, set up your Kubernetes cluster and application. A complete step-by-step instructions blog is available \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". This also provides an overview of how to see Kubernetes cluster metrics in Elastic and how to monitor them with dashboards.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-1-azure-openai-service-and-authorization\\",children:\\"Step 1: Azure OpenAI Service and authorization\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"When you log in to your Azure subscription and set up an instance of Azure OpenAI Service, you will be able to get your keys under Manage Keys.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-microsoft-azure-manage-keys.png\\",alt:\\"microsoft azure manage keys\\",width:\\"1999\\",height:\\"558\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"There are two keys for your OpenAI instance, but you only need KEY 1 .\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-pme-openai-keys-and-endpoint.png\\",alt:\\"Used with permission from Microsoft.\\",width:\\"1999\\",height:\\"1211\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Additionally, you will need to get the service URL. See the image above with our service URL blanked out to understand where to get the KEY 1 and URL.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you are not using Azure OpenAI Service and the standard OpenAI service, then you can get your keys at:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`**https** ://platform.openai.com/account/api-keys\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-api-keys.png\\",alt:\\"api keys\\",width:\\"1999\\",height:\\"801\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You will need to create a key and save it. Once you have the key, you can go to Step 2.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-2-identifying-kubernetes-errors-in-elastic-logs\\",children:\\"Step 2: Identifying Kubernetes errors in Elastic logs\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"As your Kubernetes cluster is running, \\",(0,n.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/kubernetes\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s Kubernetes integration\\"}),\\" running on the Elastic agent daemon set on your cluster is sending logs and metrics to Elastic. \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/log-monitoring-management-enterprise\\",rel:\\"nofollow\\",children:\\"The telemetry is ingested, processed, and indexed\\"}),\\". Kubernetes logs are stored in an index called .ds-logs-kubernetes.container_logs-default-* (* is for the date), and an automatic data stream logs-kubernetes.container_logs is also pre-loaded. So while you can use some of the out-of-the-box dashboards to investigate the metrics, you can also look at all the logs in Elastic Discover.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"While any error from Kubernetes can be daunting, the more nuanced issues occur with errors from the pods running in the kube-system namespace. Take the pod konnectivity agent, which is essentially a network proxy agent running on the node to help establish tunnels and is a vital component in Kubernetes. Any error will cause the cluster to have connectivity issues and lead to a cascade of issues, so it\\\\u2019s important to understand and troubleshoot these errors.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"When we filter out for error logs from the konnectivity agent, we see a good number of errors.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-expanded-document.png\\",alt:\\"expanded document\\",width:\\"1999\\",height:\\"1001\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"But unfortunately, we still can\\\\u2019t understand what these errors mean.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Enter OpenAI to help us understand the issue better. Generally, you would take the error message from Discover and paste it with a question in ChatGPT (or run a Google search on the message).\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"One error in particular that we\\\\u2019ve run into but do not understand is:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`E0510 02:51:47.138292 1 client.go:388] could not read stream err=rpc error: code = Unavailable desc = error reading from server: read tcp 10.120.0.8:46156->35.230.74.219:8132: read: connection timed out serverID=632d489f-9306-4851-b96b-9204b48f5587 agentID=e305f823-5b03-47d3-a898-70031d9f4768\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The OpenAI output is as follows:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-openai-output.png\\",alt:\\"openai output\\",width:\\"1530\\",height:\\"1472\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"ChatGPT has given us a fairly nice set of ideas on why this rpc error is occurring against our konnectivity-agent.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"So how can we get this output automatically for any error when those errors occur?\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-3-configuring-the-watcher-with-the-right-script\\",children:\\"Step 3: Configuring the watcher with the right script\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/watcher-ui.html\\",rel:\\"nofollow\\",children:\\"What is an Elastic watcher?\\"}),\\" Watcher is an Elasticsearch feature that you can use to create actions based on conditions, which are periodically evaluated using queries on your data. Watchers are helpful for analyzing mission-critical and business-critical streaming data. For example, you might watch application logs for errors causing larger operational issues.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once a watcher is configured, it can be:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Manually triggered\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Run periodically\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Created using a UI or a script\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this scenario, we will use a script, as we can modify it easily and run it as needed.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We\\\\u2019re using the DevTools Console to enter the script and test it out:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-test-script.png\\",alt:\\"test script\\",width:\\"1450\\",height:\\"930\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The script is listed at the end of the blog in the \\",(0,n.jsx)(e.strong,{children:\\"appendix\\"}),\\". It can also be downloaded \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/chatgpt-error-analysis\\",rel:\\"nofollow\\",children:(0,n.jsx)(e.strong,{children:\\"here\\"})}),\\" \\",(0,n.jsx)(e.strong,{children:\\".\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The script does the following:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"It runs continuously every five minutes.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"It will search the logs for errors from the container konnectivity-agent.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"It will take the first error\\\\u2019s message, transform it (re-format and clean up), and place it into a variable first_hit.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`\\"script\\": \\"return [\'first_hit\': ctx.payload.first.hits.hits.0._source.message.replace(\'\\\\\\\\\\"\', \\\\\\\\\\"\\\\\\\\\\")]\\"\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"The error message is sent into OpenAI with a query:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`What are the potential reasons for the following kubernetes error:\\n { { ctx.payload.second.first_hit } }\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"If the search yielded an error, it will proceed to then create an index and place the error message, pod.name (which is konnectivity-agent-6676d5695b-ccsmx in our setup), and OpenAI output into a new index called chatgpt_k8_analyzed.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"To see the results, we created a new data view called chatgpt_k8_analyzed against the newly created index:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-edit-data-view.png\\",alt:\\"edit data view\\",width:\\"1450\\",height:\\"560\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"In Discover, the output on the data view provides us with the analysis of the errors.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-analysis-of-errors.png\\",alt:\\"analysis of errors\\",width:\\"1453\\",height:\\"819\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"For every error the script sees in the five minute interval, it will get an analysis of the error. We could alternatively also use a range as needed to analyze during a specific time frame. The script would just need to be modified accordingly.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-4-output-from-azure-openaiopenai-vs-chatgpt-ui\\",children:\\"Step 4. Output from Azure OpenAI/OpenAI vs. ChatGPT UI\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"As you noticed above, we got relatively the same result from the Azure OpenAI API call as we did by testing out our query in the ChatGPT UI. This is because we configured the API call to run the same/similar model as what was selected in the UI.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"For the API call, we used the following parameters:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`\\"request\\": {\\n \\"method\\" : \\"POST\\",\\n \\"Url\\": \\"https://XXX.openai.azure.com/openai/deployments/pme-gpt-35-turbo/chat/completions?api-version=2023-03-15-preview\\",\\n \\"headers\\": {\\"api-key\\" : \\"XXXXXXX\\",\\n \\"content-type\\" : \\"application/json\\"\\n },\\n \\"body\\" : \\"{ \\\\\\\\\\"messages\\\\\\\\\\": [ { \\\\\\\\\\"role\\\\\\\\\\": \\\\\\\\\\"system\\\\\\\\\\", \\\\\\\\\\"content\\\\\\\\\\": \\\\\\\\\\"You are a helpful assistant.\\\\\\\\\\"}, { \\\\\\\\\\"role\\\\\\\\\\": \\\\\\\\\\"user\\\\\\\\\\", \\\\\\\\\\"content\\\\\\\\\\": \\\\\\\\\\"What are the potential reasons for the following kubernetes error: {{ctx.payload.second.first_hit}}\\\\\\\\\\"}], \\\\\\\\\\"temperature\\\\\\\\\\": 0.5, \\\\\\\\\\"max_tokens\\\\\\\\\\": 2048}\\" ,\\n \\"connection_timeout\\": \\"60s\\",\\n \\"read_timeout\\": \\"60s\\"\\n }\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"By setting the role: system with You are a helpful assistant and using the gpt-35-turbo url portion, we are essentially setting the API to use the davinci model, which is the same as the ChatGPT UI model set by default.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Additionally, for Azure OpenAI Service, you will need to set the URL to something similar the following:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`https://YOURSERVICENAME.openai.azure.com/openai/deployments/pme-gpt-35-turbo/chat/completions?api-version=2023-03-15-preview\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"If you use OpenAI (versus Azure OpenAI Service), the request call (against \\",(0,n.jsx)(e.a,{href:\\"https://api.openai.com/v1/completions\\",rel:\\"nofollow\\",children:\\"https://api.openai.com/v1/completions\\"}),\\") would be as such:\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`\\"request\\": {\\n \\"scheme\\": \\"https\\",\\n \\"host\\": \\"api.openai.com\\",\\n \\"port\\": 443,\\n \\"method\\": \\"post\\",\\n \\"path\\": \\"\\\\\\\\/v1\\\\\\\\/completions\\",\\n \\"params\\": {},\\n \\"headers\\": {\\n \\"content-type\\": \\"application\\\\\\\\/json\\",\\n \\"authorization\\": \\"Bearer YOUR_ACCESS_TOKEN\\"\\n },\\n \\"body\\": \\"{ \\\\\\\\\\"model\\\\\\\\\\": \\\\\\\\\\"text-davinci-003\\\\\\\\\\", \\\\\\\\\\"prompt\\\\\\\\\\": \\\\\\\\\\"What are the potential reasons for the following kubernetes error: {{ctx.payload.second.first_hit}}\\\\\\\\\\", \\\\\\\\\\"temperature\\\\\\\\\\": 1, \\\\\\\\\\"max_tokens\\\\\\\\\\": 512, \\\\\\\\\\"top_p\\\\\\\\\\": 1.0, \\\\\\\\\\"frequency_penalty\\\\\\\\\\": 0.0, \\\\\\\\\\"presence_penalty\\\\\\\\\\": 0.0 }\\",\\n \\"connection_timeout_in_millis\\": 60000,\\n \\"read_timeout_millis\\": 60000\\n }\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"If you are interested in creating a more OpenAI-based version, you can \\",(0,n.jsx)(e.a,{href:\\"https://elastic-content-share.eu/downloads/watcher-job-to-integrate-chatgpt-in-elasticsearch/\\",rel:\\"nofollow\\",children:\\"download an alternative script\\"}),\\" and look at \\",(0,n.jsx)(e.a,{href:\\"https://mar1.hashnode.dev/unlocking-the-power-of-aiops-with-chatgpt-and-elasticsearch\\",rel:\\"nofollow\\",children:\\"another blog from an Elastic community member\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"gaining-other-insights-beyond-kubernetes-logs\\",children:\\"Gaining other insights beyond Kubernetes logs\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that the script is up and running, you can modify it using different:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Inputs\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Conditions\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Actions\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Transforms\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Learn more on how to modify it \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/xpack-alerting.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". Some examples of modifications could include:\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Look for error logs from application components (e.g., cartService, frontEnd, from the OTel demo), cloud service providers (e.g., AWS/Azure/GCP logs), and even logs from components such as Kafka, databases, etc.\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Vary the time frame from running continuously to running over a specific \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-range-query.html\\",rel:\\"nofollow\\",children:\\"range\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Look for specific errors in the logs.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Query for analysis on a set of errors at once versus just one, which we demonstrated.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The modifications are endless, and of course you can run this with OpenAI rather than Azure OpenAI Service.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"I hope you\\\\u2019ve gotten an appreciation for how Elastic Observability can help you connect to OpenAI services (Azure OpenAI, as we showed, or even OpenAI) to better analyze an error log message instead of having to run several Google searches and hunt for possible insights.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here\\\\u2019s a quick recap of what we covered:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Developing an Elastic watcher script that can be used to find and send Kubernetes errors into OpenAI and insert them into a new index\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Configuring Azure OpenAI Service or OpenAI with the right authorization and request parameters\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Ready to get started? Sign up \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"for Elastic Cloud\\"}),\\" and try out the features and capabilities I\\\\u2019ve outlined above to get the most value and visibility out of your OpenTelemetry data.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"appendix\\",children:\\"Appendix\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Watcher script\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _watcher/watch/chatgpt_analysis\\n{\\n \\"trigger\\": {\\n \\"schedule\\": {\\n \\"interval\\": \\"5m\\"\\n }\\n },\\n \\"input\\": {\\n \\"chain\\": {\\n \\"inputs\\": [\\n {\\n \\"first\\": {\\n \\"search\\": {\\n \\"request\\": {\\n \\"search_type\\": \\"query_then_fetch\\",\\n \\"indices\\": [\\n \\"logs-kubernetes*\\"\\n ],\\n \\"rest_total_hits_as_int\\": true,\\n \\"body\\": {\\n \\"query\\": {\\n \\"bool\\": {\\n \\"must\\": [\\n {\\n \\"match\\": {\\n \\"kubernetes.container.name\\": \\"konnectivity-agent\\"\\n }\\n },\\n {\\n \\"match\\" : {\\n \\"message\\":\\"error\\"\\n }\\n }\\n ]\\n }\\n },\\n \\"size\\": \\"1\\"\\n }\\n }\\n }\\n }\\n },\\n {\\n \\"second\\": {\\n \\"transform\\": {\\n \\"script\\": \\"return [\'first_hit\': ctx.payload.first.hits.hits.0._source.message.replace(\'\\\\\\\\\\"\', \\\\\\\\\\"\\\\\\\\\\")]\\"\\n }\\n }\\n },\\n {\\n \\"third\\": {\\n \\"http\\": {\\n \\"request\\": {\\n \\"method\\" : \\"POST\\",\\n \\"url\\": \\"https://XXX.openai.azure.com/openai/deployments/pme-gpt-35-turbo/chat/completions?api-version=2023-03-15-preview\\",\\n \\"headers\\": {\\n \\"api-key\\" : \\"XXX\\",\\n \\"content-type\\" : \\"application/json\\"\\n },\\n \\"body\\" : \\"{ \\\\\\\\\\"messages\\\\\\\\\\": [ { \\\\\\\\\\"role\\\\\\\\\\": \\\\\\\\\\"system\\\\\\\\\\", \\\\\\\\\\"content\\\\\\\\\\": \\\\\\\\\\"You are a helpful assistant.\\\\\\\\\\"}, { \\\\\\\\\\"role\\\\\\\\\\": \\\\\\\\\\"user\\\\\\\\\\", \\\\\\\\\\"content\\\\\\\\\\": \\\\\\\\\\"What are the potential reasons for the following kubernetes error: {{ctx.payload.second.first_hit}}\\\\\\\\\\"}], \\\\\\\\\\"temperature\\\\\\\\\\": 0.5, \\\\\\\\\\"max_tokens\\\\\\\\\\": 2048}\\" ,\\n \\"connection_timeout\\": \\"60s\\",\\n \\"read_timeout\\": \\"60s\\"\\n }\\n }\\n }\\n }\\n ]\\n }\\n },\\n \\"condition\\": {\\n \\"compare\\": {\\n \\"ctx.payload.first.hits.total\\": {\\n \\"gt\\": 0\\n }\\n }\\n },\\n \\"actions\\": {\\n \\"index_payload\\" : {\\n \\"transform\\": {\\n \\"script\\": {\\n \\"source\\": \\"\\"\\"\\n def payload = [:];\\n payload.timestamp = new Date();\\n payload.pod_name = ctx.payload.first.hits.hits[0]._source.kubernetes.pod.name;\\n payload.error_message = ctx.payload.second.first_hit;\\n payload.chatgpt_analysis = ctx.payload.third.choices[0].message.content;\\n return payload;\\n \\"\\"\\"\\n }\\n },\\n \\"index\\" : {\\n \\"index\\" : \\"chatgpt_k8s_analyzed\\"\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"additional-logging-resources\\",children:\\"Additional logging resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/getting-started/observability/collect-and-analyze-logs\\",rel:\\"nofollow\\",children:\\"Getting started with logging on Elastic (quickstart)\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/logs-metrics-get-started.html\\",rel:\\"nofollow\\",children:\\"Ingesting common known logs via integrations (compute node example)\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations\\",rel:\\"nofollow\\",children:\\"List of integrations\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/log-monitoring-management-enterprise\\",rel:\\"nofollow\\",children:\\"Ingesting custom application logs into Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-parsing-schema-read-write\\",rel:\\"nofollow\\",children:\\"Enriching logs in Elastic\\"})}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Analyzing Logs with \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:\\"Anomaly Detection (ML)\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-machine-learning-aiops\\",rel:\\"nofollow\\",children:\\"AIOps\\"})]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"common-use-case-examples-with-logs\\",children:\\"Common use case examples with logs:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://youtu.be/ax04ZFWqVCg\\",rel:\\"nofollow\\",children:\\"Nginx log management\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability\\",rel:\\"nofollow\\",children:\\"AWS VPC Flow log management\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-errors-observability-logs-openai\\",rel:\\"nofollow\\",children:\\"Using OpenAI to analyze Kubernetes errors\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://youtu.be/Li5TJAWbz8Q\\",rel:\\"nofollow\\",children:\\"PostgreSQL issue analysis with AIOps\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"In this blog post, we may have used third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"Elastic, Elasticsearch and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"Screenshots of Microsoft products used with permission from Microsoft.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return v(k);})();\\n;return Component;"},"_id":"articles/kubernetes-errors-elastic-observability-logs-openai.mdx","_raw":{"sourceFilePath":"articles/kubernetes-errors-elastic-observability-logs-openai.mdx","sourceFileName":"kubernetes-errors-elastic-observability-logs-openai.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/kubernetes-errors-elastic-observability-logs-openai"},"type":"Article","imageUrl":"/assets/images/kubernetes-errors-observability-logs-openai/blog-elastic-configuration.png","readingTime":"25 min read","url":"/kubernetes-errors-observability-logs-openai","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"Setting it all up","href":"#setting-it-all-up"},{"level":3,"title":"Step 0: Create an account on Elastic Cloud","href":"#step-0-create-an-account-on-elastic-cloud"},{"level":3,"title":"Step 1: Azure OpenAI Service and authorization","href":"#step-1-azure-openai-service-and-authorization"},{"level":3,"title":"Step 2: Identifying Kubernetes errors in Elastic logs","href":"#step-2-identifying-kubernetes-errors-in-elastic-logs"},{"level":3,"title":"Step 3: Configuring the watcher with the right script","href":"#step-3-configuring-the-watcher-with-the-right-script"},{"level":3,"title":"Step 4. Output from Azure OpenAI/OpenAI vs. ChatGPT UI","href":"#step-4-output-from-azure-openaiopenai-vs-chatgpt-ui"},{"level":2,"title":"Gaining other insights beyond Kubernetes logs","href":"#gaining-other-insights-beyond-kubernetes-logs"},{"level":2,"title":"Conclusion","href":"#conclusion"},{"level":2,"title":"Appendix","href":"#appendix"},{"level":3,"title":"Additional logging resources:","href":"#additional-logging-resources"},{"level":3,"title":"Common use case examples with logs:","href":"#common-use-case-examples-with-logs"}]},{"title":"LLM Observability with the new Amazon Bedrock Integration in Elastic Observability","slug":"llm-observability-aws-bedrock","date":"2024-11-25","description":"Elastic\'s new Amazon Bedrock integration for Observability provides comprehensive insights into Amazon Bedrock LLM performance and usage. Learn about how LLM based metric and log collection in real-time with pre-built dashboards can effectively monitor and resolve LLM invocation errors and performance challenges.","image":"LLM-observability-AWS-Bedrock.jpg","author":[{"slug":"agi-thomas","type":"Author","_raw":{}}],"tags":[{"slug":"aws","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}},{"slug":"aws-bedrock","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs organizations increasingly adopt LLMs for AI-powered applications such as content creation, Retrieval-Augmented Generation (RAG), and data analysis, SREs and developers face new challenges. Tasks like monitoring workflows, analyzing input and output, managing query latency, and controlling costs become critical. LLM observability helps address these issues by providing clear insights into how these models perform, allowing teams to quickly identify bottlenecks, optimize configurations, and improve reliability. With better observability, SREs can confidently scale LLM applications, especially on platforms like [Amazon Bedrock](https://aws.amazon.com/bedrock/), while minimizing downtime and keeping costs in check.\\n\\nElastic is expanding support for LLM Observability with Elastic Observability\'s new [Amazon Bedrock integration](https://www.elastic.co/docs/current/integrations/aws_bedrock). This new observability integration provides you with comprehensive visibility into the performance and usage of foundational models from leading AI companies and from Amazon available through Amazon Bedrock. The new Amazon Bedrock Observability integration offers an out-of-the-box experience by simplifying the collection of Amazon Bedrock metrics and logs, making it easier to gain actionable insights and effectively manage your models. The integration is simple to set up and comes with pre-built, out-of-the-box dashboards. With real-time insights, SREs can now monitor, optimize and troubleshoot LLM applications that are using Amazon Bedrock.\\n\\nThis blog will walk through the features available to SREs, such as monitoring invocations, errors, and latency information across various models, along with the usage and performance of LLM requests. Additionally, the blog will show how easy it is to set up and what insights you can gain from Elastic for LLM Observability.\\n\\n## Prerequisites \\n\\nTo follow along with this blog, please make sure you have:\\n\\n- An account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack in AWS ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)). Ensure you are using version 8.13 or higher.\\n- An AWS account with permissions to pull the necessary data from AWS. [See details in our documentation](https://docs.elastic.co/en/integrations/aws#aws-permissions).\\n\\n## Configuring Amazon Bedrock Logs Collection\\n\\nTo collect Amazon Bedrock logs, you can choose from the following options:\\n\\n1. Amazon Simple Storage Service (Amazon S3) bucket\\n2. Amazon CloudWatch logs\\n\\n**S3 Bucket Logs Collection**: When collecting logs from the Amazon S3 bucket, you can retrieve logs from Amazon S3 objects pointed to by Amazon S3 notification events, which are read from an SQS queue, or by directly polling a list of Amazon S3 objects in an Amazon S3 bucket. Refer to Elastic’s [Custom AWS Logs](https://www.elastic.co/docs/current/integrations/aws_logs) integration for more details.\\n\\n**CloudWatch Logs Collection**: In this option, you will need to create a [CloudWatch log group](https://console.aws.amazon.com/cloudwatch/). After creating the log group, be sure to note down the ARN of the newly created log group, as you will need it for the Amazon Bedrock settings configuration and Amazon Bedrock integration configuration for logs. \\n\\n\\nConfigure the Amazon Bedrock CloudWatch logs with the Log group ARN to start collecting CloudWatch logs.\\n\\n![](/assets/images/llm-observability-aws-bedrock/cloudwatch-logs-configuration.png)\\n\\nPlease visit the [AWS Console](https://aws.amazon.com/console/) and navigate to the \\"Settings\\" section under [Amazon Bedrock](https://aws.amazon.com/bedrock/) and select your preferred method of collecting logs. Based on the value you select from the Logging Destination in the Amazon Bedrock settings, you will need to enter either the Amazon S3 location or the CloudWatch log group ARN.\\n\\n![](/assets/images/llm-observability-aws-bedrock/aws-bedrock-logs-configuration.png)\\n\\n## Configuring Amazon Bedrock Metrics Collection\\n\\nConfigure Elastic\'s Amazon Bedrock integration to collect Amazon Bedrock metrics from your chosen AWS region at the specified collection interval.\\n\\n![](/assets/images/llm-observability-aws-bedrock/cloudwatch-metrics-configuration.png)\\n\\n## Maximize Visibility with Out-of-the-Box Dashboards\\n\\nAmazon Bedrock integration offers rich out-of-the-box visibility into the performance and usage information of models in Amazon Bedrock, including text and image models. The **Amazon Bedrock Overview** dashboard provides a summarized view of the invocations, errors and latency information across various models. \\n\\n![](/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-metric-summary.png)\\n\\nThe **Text / Chat metrics** section in the **Amazon Bedrock Overview** dashboard provides insights into token usage for Text models in Amazon Bedrock. This includes use cases such as text content generation, summarization, translation, code generation, question answering, and sentiment analysis.\\n\\n![](/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-text-metrics.png)\\n\\nThe **Image metrics** section in the **Amazon Bedrock Overview** dashboard offers valuable insights into the usage of Image models in Amazon Bedrock.\\n\\n![](/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-image-metrics.png)\\n\\nThe **Logs** section of the **Amazon Bedrock Overview** dashboard in Elastic provides detailed insights into the usage and performance of LLM requests. It enables you to monitor key details such as model name, version, LLM prompt and response, usage tokens, request size, completion tokens, response size, and any error codes tied to specific LLM requests.\\n\\nThe detailed logs provide full visibility into raw model interactions, capturing both the inputs (prompts) and the outputs (responses) generated by the models. This transparency enables you to analyze and optimize how your LLM handles different requests, allowing for more precise fine-tuning of both the prompt structure and the resulting model responses. By closely monitoring these interactions, you can refine prompt strategies and enhance the quality and reliability of model outputs.\\n\\n![](/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-logs-details.png)\\n\\n**Amazon Bedrock Overview** dashboard provides a comprehensive view of the initial and final response times. It includes a percentage comparison graph that highlights the performance differences between these response stages, enabling you to quickly identify efficiency improvements or potential bottlenecks in your LLM interactions.\\n\\n![](/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-performance.png)\\n\\n## Creating Alerts and SLOs to Monitor Amazon Bedrock\\n\\nAs with any Elastic integration, Amazon Bedrock [logs](https://www.elastic.co/docs/current/integrations/aws_bedrock#collecting-bedrock-model-invocation-logs-from-s3-bucket) and [metrics](https://www.elastic.co/docs/current/integrations/aws_bedrock#metrics) are fully integrated into Elastic Observability, allowing you to leverage features like SLOs, alerting, custom dashboards, and detailed logs exploration.\\n\\nTo create an alert, for example to monitor LLM invocation latency in Amazon Bedrock, you can apply a Custom Threshold rule on the Amazon Bedrock datastream. Set the rule to trigger an alert when the LLM invocation latency exceeds a defined threshold. This ensures proactive monitoring of model performance, allowing you to detect and address latency issues before they impact the user experience.\\n\\n![](/assets/images/llm-observability-aws-bedrock/aws-bedrock-alert-invocation-latency.png)\\n\\nWhen a violation occurs, the Alert Details view linked in the notification provides detailed context, including when the issue began, its current status, and any history of similar violations. This rich information enables rapid triaging, investigation, and root cause analysis to resolve issues efficiently.\\n\\nSimilarly, to create an SLO for monitoring Amazon Bedrock invocation performance for instance, you can define a custom query SLI where good events are those Amazon Bedrock invocations that do not result in client errors or server errors and have latency less than 10 seconds. Set an appropriate SLO target, such as 99%. This will help you identify errors and latency issues in applications using LLMs, allowing you to take timely corrective actions before they affect the overall user experience.\\n\\n![](/assets/images/llm-observability-aws-bedrock/aws-bedrock-slo-configuration.png)\\n\\nThe image below highlights the SLOs, SLIs, and the remaining error budget for Amazon Bedrock models. The observed violations are a result of deliberately crafted long text generation prompts, which led to extended response times. This example demonstrates how the system tracks performance against defined targets, helping you quickly identify latency issues and performance bottlenecks. By monitoring these metrics, you gain valuable insights for proactive issue triaging, allowing for timely corrective actions and improved user experience of applications using LLM.\\n\\n![](/assets/images/llm-observability-aws-bedrock/aws-bedrock-slo-rundata.png)\\n\\n## Try it out today\\n\\nThe Amazon Bedrock playgrounds provide a console environment to experiment with running inference on different models and configurations before deciding to use them in an application. Start your own 7-day free trial by signing up via AWS Marketplace and quickly spin up a deployment in minutes on any of the Elastic Cloud regions on AWS around the world.\\n\\nDeploy a cluster on our [Elasticsearch Service](https://www.elastic.co/cloud/elasticsearch-service), [download](https://www.elastic.co/downloads/) the Elasticsearch stack, or run [Elastic from AWS Marketplace](https://aws.amazon.com/marketplace/seller-profile?id=d8f59038-c24c-4a9d-a66d-6711d35d7305) then spin up the new technical preview of Amazon Bedrock integration, open the curated dashboards in Kibana and start monitoring your Amazon Bedrock service!\\n","code":"var Component=(()=>{var g=Object.create;var a=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),b=(i,e)=>{for(var n in e)a(i,n,{get:e[n],enumerable:!0})},s=(i,e,n,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let t of u(e))!f.call(i,t)&&t!==n&&a(i,t,{get:()=>e[t],enumerable:!(r=m(e,t))||r.enumerable});return i};var y=(i,e,n)=>(n=i!=null?g(p(i)):{},s(e||!i||!i.__esModule?a(n,\\"default\\",{value:i,enumerable:!0}):n,i)),v=i=>s(a({},\\"__esModule\\",{value:!0}),i);var c=w((L,l)=>{l.exports=_jsx_runtime});var A={};b(A,{default:()=>h,frontmatter:()=>k});var o=y(c()),k={title:\\"LLM Observability with the new Amazon Bedrock Integration in Elastic Observability\\",slug:\\"llm-observability-aws-bedrock\\",date:\\"2024-11-25\\",description:\\"Elastic\'s new Amazon Bedrock integration for Observability provides comprehensive insights into Amazon Bedrock LLM performance and usage. Learn about how LLM based metric and log collection in real-time with pre-built dashboards can effectively monitor and resolve LLM invocation errors and performance challenges.\\",author:[{slug:\\"agi-thomas\\"}],image:\\"LLM-observability-AWS-Bedrock.jpg\\",tags:[{slug:\\"aws\\"},{slug:\\"genai\\"},{slug:\\"aws-bedrock\\"}]};function d(i){let e={a:\\"a\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsxs)(e.p,{children:[\\"As organizations increasingly adopt LLMs for AI-powered applications such as content creation, Retrieval-Augmented Generation (RAG), and data analysis, SREs and developers face new challenges. Tasks like monitoring workflows, analyzing input and output, managing query latency, and controlling costs become critical. LLM observability helps address these issues by providing clear insights into how these models perform, allowing teams to quickly identify bottlenecks, optimize configurations, and improve reliability. With better observability, SREs can confidently scale LLM applications, especially on platforms like \\",(0,o.jsx)(e.a,{href:\\"https://aws.amazon.com/bedrock/\\",rel:\\"nofollow\\",children:\\"Amazon Bedrock\\"}),\\", while minimizing downtime and keeping costs in check.\\"]}),`\\n`,(0,o.jsxs)(e.p,{children:[\\"Elastic is expanding support for LLM Observability with Elastic Observability\'s new \\",(0,o.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/aws_bedrock\\",rel:\\"nofollow\\",children:\\"Amazon Bedrock integration\\"}),\\". This new observability integration provides you with comprehensive visibility into the performance and usage of foundational models from leading AI companies and from Amazon available through Amazon Bedrock. The new Amazon Bedrock Observability integration offers an out-of-the-box experience by simplifying the collection of Amazon Bedrock metrics and logs, making it easier to gain actionable insights and effectively manage your models. The integration is simple to set up and comes with pre-built, out-of-the-box dashboards. With real-time insights, SREs can now monitor, optimize and troubleshoot LLM applications that are using Amazon Bedrock.\\"]}),`\\n`,(0,o.jsx)(e.p,{children:\\"This blog will walk through the features available to SREs, such as monitoring invocations, errors, and latency information across various models, along with the usage and performance of LLM requests. Additionally, the blog will show how easy it is to set up and what insights you can gain from Elastic for LLM Observability.\\"}),`\\n`,(0,o.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,o.jsx)(e.p,{children:\\"To follow along with this blog, please make sure you have:\\"}),`\\n`,(0,o.jsxs)(e.ul,{children:[`\\n`,(0,o.jsxs)(e.li,{children:[\\"An account on \\",(0,o.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack in AWS (\\",(0,o.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\"). Ensure you are using version 8.13 or higher.\\"]}),`\\n`,(0,o.jsxs)(e.li,{children:[\\"An AWS account with permissions to pull the necessary data from AWS. \\",(0,o.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#aws-permissions\\",rel:\\"nofollow\\",children:\\"See details in our documentation\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,o.jsx)(e.h2,{id:\\"configuring-amazon-bedrock-logs-collection\\",children:\\"Configuring Amazon Bedrock Logs Collection\\"}),`\\n`,(0,o.jsx)(e.p,{children:\\"To collect Amazon Bedrock logs, you can choose from the following options:\\"}),`\\n`,(0,o.jsxs)(e.ol,{children:[`\\n`,(0,o.jsx)(e.li,{children:\\"Amazon Simple Storage Service (Amazon S3) bucket\\"}),`\\n`,(0,o.jsx)(e.li,{children:\\"Amazon CloudWatch logs\\"}),`\\n`]}),`\\n`,(0,o.jsxs)(e.p,{children:[(0,o.jsx)(e.strong,{children:\\"S3 Bucket Logs Collection\\"}),\\": When collecting logs from the Amazon S3 bucket, you can retrieve logs from Amazon S3 objects pointed to by Amazon S3 notification events, which are read from an SQS queue, or by directly polling a list of Amazon S3 objects in an Amazon S3 bucket. Refer to Elastic\\\\u2019s \\",(0,o.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/aws_logs\\",rel:\\"nofollow\\",children:\\"Custom AWS Logs\\"}),\\" integration for more details.\\"]}),`\\n`,(0,o.jsxs)(e.p,{children:[(0,o.jsx)(e.strong,{children:\\"CloudWatch Logs Collection\\"}),\\": In this option, you will need to create a \\",(0,o.jsx)(e.a,{href:\\"https://console.aws.amazon.com/cloudwatch/\\",rel:\\"nofollow\\",children:\\"CloudWatch log group\\"}),\\". After creating the log group, be sure to note down the ARN of the newly created log group, as you will need it for the Amazon Bedrock settings configuration and Amazon Bedrock integration configuration for logs.\\"]}),`\\n`,(0,o.jsx)(e.p,{children:\\"Configure the Amazon Bedrock CloudWatch logs with the Log group ARN to start collecting CloudWatch logs.\\"}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/cloudwatch-logs-configuration.png\\",alt:\\"\\",width:\\"1516\\",height:\\"920\\"})}),`\\n`,(0,o.jsxs)(e.p,{children:[\\"Please visit the \\",(0,o.jsx)(e.a,{href:\\"https://aws.amazon.com/console/\\",rel:\\"nofollow\\",children:\\"AWS Console\\"}),\' and navigate to the \\"Settings\\" section under \',(0,o.jsx)(e.a,{href:\\"https://aws.amazon.com/bedrock/\\",rel:\\"nofollow\\",children:\\"Amazon Bedrock\\"}),\\" and select your preferred method of collecting logs. Based on the value you select from the Logging Destination in the Amazon Bedrock settings, you will need to enter either the Amazon S3 location or the CloudWatch log group ARN.\\"]}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/aws-bedrock-logs-configuration.png\\",alt:\\"\\",width:\\"4520\\",height:\\"1994\\"})}),`\\n`,(0,o.jsx)(e.h2,{id:\\"configuring-amazon-bedrock-metrics-collection\\",children:\\"Configuring Amazon Bedrock Metrics Collection\\"}),`\\n`,(0,o.jsx)(e.p,{children:\\"Configure Elastic\'s Amazon Bedrock integration to collect Amazon Bedrock metrics from your chosen AWS region at the specified collection interval.\\"}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/cloudwatch-metrics-configuration.png\\",alt:\\"\\",width:\\"1502\\",height:\\"528\\"})}),`\\n`,(0,o.jsx)(e.h2,{id:\\"maximize-visibility-with-out-of-the-box-dashboards\\",children:\\"Maximize Visibility with Out-of-the-Box Dashboards\\"}),`\\n`,(0,o.jsxs)(e.p,{children:[\\"Amazon Bedrock integration offers rich out-of-the-box visibility into the performance and usage information of models in Amazon Bedrock, including text and image models. The \\",(0,o.jsx)(e.strong,{children:\\"Amazon Bedrock Overview\\"}),\\" dashboard provides a summarized view of the invocations, errors and latency information across various models.\\"]}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-metric-summary.png\\",alt:\\"\\",width:\\"3456\\",height:\\"1826\\"})}),`\\n`,(0,o.jsxs)(e.p,{children:[\\"The \\",(0,o.jsx)(e.strong,{children:\\"Text / Chat metrics\\"}),\\" section in the \\",(0,o.jsx)(e.strong,{children:\\"Amazon Bedrock Overview\\"}),\\" dashboard provides insights into token usage for Text models in Amazon Bedrock. This includes use cases such as text content generation, summarization, translation, code generation, question answering, and sentiment analysis.\\"]}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-text-metrics.png\\",alt:\\"\\",width:\\"3440\\",height:\\"660\\"})}),`\\n`,(0,o.jsxs)(e.p,{children:[\\"The \\",(0,o.jsx)(e.strong,{children:\\"Image metrics\\"}),\\" section in the \\",(0,o.jsx)(e.strong,{children:\\"Amazon Bedrock Overview\\"}),\\" dashboard offers valuable insights into the usage of Image models in Amazon Bedrock.\\"]}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-image-metrics.png\\",alt:\\"\\",width:\\"3440\\",height:\\"660\\"})}),`\\n`,(0,o.jsxs)(e.p,{children:[\\"The \\",(0,o.jsx)(e.strong,{children:\\"Logs\\"}),\\" section of the \\",(0,o.jsx)(e.strong,{children:\\"Amazon Bedrock Overview\\"}),\\" dashboard in Elastic provides detailed insights into the usage and performance of LLM requests. It enables you to monitor key details such as model name, version, LLM prompt and response, usage tokens, request size, completion tokens, response size, and any error codes tied to specific LLM requests.\\"]}),`\\n`,(0,o.jsx)(e.p,{children:\\"The detailed logs provide full visibility into raw model interactions, capturing both the inputs (prompts) and the outputs (responses) generated by the models. This transparency enables you to analyze and optimize how your LLM handles different requests, allowing for more precise fine-tuning of both the prompt structure and the resulting model responses. By closely monitoring these interactions, you can refine prompt strategies and enhance the quality and reliability of model outputs.\\"}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-logs-details.png\\",alt:\\"\\",width:\\"3456\\",height:\\"1524\\"})}),`\\n`,(0,o.jsxs)(e.p,{children:[(0,o.jsx)(e.strong,{children:\\"Amazon Bedrock Overview\\"}),\\" dashboard provides a comprehensive view of the initial and final response times. It includes a percentage comparison graph that highlights the performance differences between these response stages, enabling you to quickly identify efficiency improvements or potential bottlenecks in your LLM interactions.\\"]}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/aws-bedrock-dashboard-performance.png\\",alt:\\"\\",width:\\"3454\\",height:\\"592\\"})}),`\\n`,(0,o.jsx)(e.h2,{id:\\"creating-alerts-and-slos-to-monitor-amazon-bedrock\\",children:\\"Creating Alerts and SLOs to Monitor Amazon Bedrock\\"}),`\\n`,(0,o.jsxs)(e.p,{children:[\\"As with any Elastic integration, Amazon Bedrock \\",(0,o.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/aws_bedrock#collecting-bedrock-model-invocation-logs-from-s3-bucket\\",rel:\\"nofollow\\",children:\\"logs\\"}),\\" and \\",(0,o.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/aws_bedrock#metrics\\",rel:\\"nofollow\\",children:\\"metrics\\"}),\\" are fully integrated into Elastic Observability, allowing you to leverage features like SLOs, alerting, custom dashboards, and detailed logs exploration.\\"]}),`\\n`,(0,o.jsx)(e.p,{children:\\"To create an alert, for example to monitor LLM invocation latency in Amazon Bedrock, you can apply a Custom Threshold rule on the Amazon Bedrock datastream. Set the rule to trigger an alert when the LLM invocation latency exceeds a defined threshold. This ensures proactive monitoring of model performance, allowing you to detect and address latency issues before they impact the user experience.\\"}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/aws-bedrock-alert-invocation-latency.png\\",alt:\\"\\",width:\\"1082\\",height:\\"1186\\"})}),`\\n`,(0,o.jsx)(e.p,{children:\\"When a violation occurs, the Alert Details view linked in the notification provides detailed context, including when the issue began, its current status, and any history of similar violations. This rich information enables rapid triaging, investigation, and root cause analysis to resolve issues efficiently.\\"}),`\\n`,(0,o.jsx)(e.p,{children:\\"Similarly, to create an SLO for monitoring Amazon Bedrock invocation performance for instance, you can define a custom query SLI where good events are those Amazon Bedrock invocations that do not result in client errors or server errors and have latency less than 10 seconds. Set an appropriate SLO target, such as 99%. This will help you identify errors and latency issues in applications using LLMs, allowing you to take timely corrective actions before they affect the overall user experience.\\"}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/aws-bedrock-slo-configuration.png\\",alt:\\"\\",width:\\"1584\\",height:\\"2174\\"})}),`\\n`,(0,o.jsx)(e.p,{children:\\"The image below highlights the SLOs, SLIs, and the remaining error budget for Amazon Bedrock models. The observed violations are a result of deliberately crafted long text generation prompts, which led to extended response times. This example demonstrates how the system tracks performance against defined targets, helping you quickly identify latency issues and performance bottlenecks. By monitoring these metrics, you gain valuable insights for proactive issue triaging, allowing for timely corrective actions and improved user experience of applications using LLM.\\"}),`\\n`,(0,o.jsx)(e.p,{children:(0,o.jsx)(e.img,{src:\\"/assets/images/llm-observability-aws-bedrock/aws-bedrock-slo-rundata.png\\",alt:\\"\\",width:\\"4636\\",height:\\"1020\\"})}),`\\n`,(0,o.jsx)(e.h2,{id:\\"try-it-out-today\\",children:\\"Try it out today\\"}),`\\n`,(0,o.jsx)(e.p,{children:\\"The Amazon Bedrock playgrounds provide a console environment to experiment with running inference on different models and configurations before deciding to use them in an application. Start your own 7-day free trial by signing up via AWS Marketplace and quickly spin up a deployment in minutes on any of the Elastic Cloud regions on AWS around the world.\\"}),`\\n`,(0,o.jsxs)(e.p,{children:[\\"Deploy a cluster on our \\",(0,o.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/elasticsearch-service\\",rel:\\"nofollow\\",children:\\"Elasticsearch Service\\"}),\\", \\",(0,o.jsx)(e.a,{href:\\"https://www.elastic.co/downloads/\\",rel:\\"nofollow\\",children:\\"download\\"}),\\" the Elasticsearch stack, or run \\",(0,o.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/seller-profile?id=d8f59038-c24c-4a9d-a66d-6711d35d7305\\",rel:\\"nofollow\\",children:\\"Elastic from AWS Marketplace\\"}),\\" then spin up the new technical preview of Amazon Bedrock integration, open the curated dashboards in Kibana and start monitoring your Amazon Bedrock service!\\"]})]})}function h(i={}){let{wrapper:e}=i.components||{};return e?(0,o.jsx)(e,{...i,children:(0,o.jsx)(d,{...i})}):d(i)}return v(A);})();\\n;return Component;"},"_id":"articles/llm-observability-aws-bedrock.mdx","_raw":{"sourceFilePath":"articles/llm-observability-aws-bedrock.mdx","sourceFileName":"llm-observability-aws-bedrock.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/llm-observability-aws-bedrock"},"type":"Article","imageUrl":"/assets/images/llm-observability-aws-bedrock/LLM-observability-AWS-Bedrock.jpg","readingTime":"7 min read","url":"/llm-observability-aws-bedrock","headings":[{"level":2,"title":"Prerequisites ","href":"#prerequisites-"},{"level":2,"title":"Configuring Amazon Bedrock Logs Collection","href":"#configuring-amazon-bedrock-logs-collection"},{"level":2,"title":"Configuring Amazon Bedrock Metrics Collection","href":"#configuring-amazon-bedrock-metrics-collection"},{"level":2,"title":"Maximize Visibility with Out-of-the-Box Dashboards","href":"#maximize-visibility-with-out-of-the-box-dashboards"},{"level":2,"title":"Creating Alerts and SLOs to Monitor Amazon Bedrock","href":"#creating-alerts-and-slos-to-monitor-amazon-bedrock"},{"level":2,"title":"Try it out today","href":"#try-it-out-today"}]},{"title":"LLM Observability with Elastic: Azure OpenAI Part 2","slug":"llm-observability-azure-openai-v2","date":"2024-08-23","description":"We have added further capabilities to the Azure OpenAI GA package, which now offer prompt and response monitoring, PTU deployment performance tracking, and billing insights!","image":"LLM-observability.jpg","author":[{"slug":"muthukumar-paramasivam","type":"Author","_raw":{}},{"slug":"lalit-satapathy","type":"Author","_raw":{}}],"tags":[{"slug":"azure","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}}],"body":{"raw":"\\nWe recently announced GA of the Azure OpenAI integration. You can find details in our previous blog [LLM Observability: Azure OpenAI](https://www.elastic.co/observability-labs/blog/llm-observability-azure-openai).\\n\\nSince then, we have added further capabilities to the Azure OpenAI GA package, which now offer prompt and response monitoring, PTU deployment performance tracking, and billing insights. Read on to learn more!\\n\\n## Advanced Logging and Monitoring\\nThe initial GA release of the integration focused mainly on the native logs, to track the telemetry of the service by using **cognitive services logging**. This version of the Azure OpenAI integration allows you to process the advanced logs which gives a more holistic view of OpenAI resource usage.\\n\\nTo achieve this, you have to setup API Management services in Azure. The API Management service is a centralized place where you can put all OpenAI services endpoints to manage all of them end-to-end. Enable the API Management services and configure the Azure event hub to stream the logs.\\n\\nTo learn more about setting up the API Management service to access Azure OpenAI, please refer to the [Azure documentation](https://learn.microsoft.com/en-us/azure/architecture/ai-ml/openai/architecture/log-monitor-azure-openai).\\n\\nBy using advanced logging, you can collect the following log data:\\n\\n- Request input text\\n- Response output text\\n- Content filter results\\n- Usage Information\\n - Input prompt tokens\\n - Output completion tokens\\n - Total tokens\\n\\nAzure OpenAI integration now collects the API Management Gateway logs. When a question from the user goes to the API Management, it logs the questions and the responses from the GPT models.\\n\\n![LLM Observability: Azure OpenAI Logs Overview](/assets/images/llm-observability-azure-openai-v2/llm-observability-azure-openai-log-categories.png)\\n\\nHere’s what a sample log looks like,\\n![LLM Observability: Azure OpenAI Advanced Logs](/assets/images/llm-observability-azure-openai-v2/llm-observability-advance-log-monitoring.png)\\n\\n### Content filtered results\\nAzure OpenAI’s content filtering system detects and takes action on specific categories of potentially harmful content in both input prompts and output completions. With Azure OpenAI model deployments, you can use the default content filter or create your own content filter.\\n\\nNow, The integration collects the content filtered result logs. In this example let\'s create a custom filter in the Azure OpenAI Studio that generates an error log.\\n\\nBy leveraging the **Azure Content Filters**, you can create your own custom lists of terms or phrases to block or flag.\\n![LLM Observability: Azure OpenAI Set Content Filter](/assets/images/llm-observability-azure-openai-v2/llm-observability-azure-content-filters.png)\\n\\nAnd the document ingested in Elastic would look like this:\\n![LLM Observability: Azure OpenAI Content Filter Logs](/assets/images/llm-observability-azure-openai-v2/llm-observability-content-filter-logs.png)\\nThis screenshot provides insights into the content filtered request.\\n\\n## PTU Deployment Monitoring\\n[Provisioned throughput units (PTU)](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/provisioned-throughput) are units of model processing capacity that you can reserve and deploy for processing prompts and generating completions.\\n\\nThe curated dashboard for PTU Deployment gives comprehensive visibility into metrics such as request latency, active token usage, PTU utilization, and fine-tuning activities, offering a quick snapshot of your deployment\'s health and performance.\\n\\nHere are the essential PTU metrics captured by default:\\n\\n- **Time to Response:** Time taken for the first response to appear after a user send a prompt.\\n- **Active Tokens:** Use this metric to understand your TPS or TPM based utilization for PTUs and compare to the benchmarks for target TPS or TPM scenarios.\\n- **Provision-managed Utilization V2:** Provides insights into utilization percentages, helping prevent overuse and ensuring efficient resource allocation.\\n- **Prompt Token Cache Match Rate:** The prompt token cache hit ratio expressed as a percentage.\\n\\n![LLM Observability: Azure OpenAI PTU Deployment Metrics Monitoring](/assets/images/llm-observability-azure-openai-v2/llm-observability-azure_open_ai_ptu_deployment.png)\\n\\n## Using Billing for cost\\nUsing the curated overview dashboard you can now monitor the actual usage cost for the AI applications. You are one step away from processing the billing information.\\n\\nYou need to configure and install the [Azure billing metrics integration](https://www.elastic.co/docs/current/integrations/azure_billing). Once the installation is complete the usage cost is visualized for the cognitive services in the Azure OpenAI overview dashboard.\\n\\n![LLM Observability: Azure OpenAI Usage Cost Monitoring](/assets/images/llm-observability-azure-openai-v2/llm-observability-azure_openai_billing_overview.png)\\n\\n## Try it out today\\nDeploy a cluster on our [Elasticsearch Service](https://www.elastic.co/cloud/elasticsearch-service) or [download](https://www.elastic.co/downloads/) the stack, spin up the new Azure OpenAI integration, open the curated dashboards in Kibana and start monitoring your Azure OpenAI service!","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,v=Object.prototype.hasOwnProperty;var f=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),b=(t,e)=>{for(var i in e)r(t,i,{get:e[i],enumerable:!0})},s=(t,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!v.call(t,o)&&o!==i&&r(t,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return t};var y=(t,e,i)=>(i=t!=null?p(m(t)):{},s(e||!t||!t.__esModule?r(i,\\"default\\",{value:t,enumerable:!0}):i,t)),A=t=>s(r({},\\"__esModule\\",{value:!0}),t);var c=f((I,l)=>{l.exports=_jsx_runtime});var z={};b(z,{default:()=>d,frontmatter:()=>w});var n=y(c()),w={title:\\"LLM Observability with Elastic: Azure OpenAI Part 2\\",slug:\\"llm-observability-azure-openai-v2\\",date:\\"2024-08-23\\",description:\\"We have added further capabilities to the Azure OpenAI GA package, which now offer prompt and response monitoring, PTU deployment performance tracking, and billing insights!\\",author:[{slug:\\"muthukumar-paramasivam\\"},{slug:\\"lalit-satapathy\\"}],image:\\"LLM-observability.jpg\\",tags:[{slug:\\"azure\\"},{slug:\\"genai\\"}]};function h(t){let e={a:\\"a\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"We recently announced GA of the Azure OpenAI integration. You can find details in our previous blog \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/llm-observability-azure-openai\\",rel:\\"nofollow\\",children:\\"LLM Observability: Azure OpenAI\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Since then, we have added further capabilities to the Azure OpenAI GA package, which now offer prompt and response monitoring, PTU deployment performance tracking, and billing insights. Read on to learn more!\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"advanced-logging-and-monitoring\\",children:\\"Advanced Logging and Monitoring\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The initial GA release of the integration focused mainly on the native logs, to track the telemetry of the service by using \\",(0,n.jsx)(e.strong,{children:\\"cognitive services logging\\"}),\\". This version of the Azure OpenAI integration allows you to process the advanced logs which gives a more holistic view of OpenAI resource usage.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"To achieve this, you have to setup API Management services in Azure. The API Management service is a centralized place where you can put all OpenAI services endpoints to manage all of them end-to-end. Enable the API Management services and configure the Azure event hub to stream the logs.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To learn more about setting up the API Management service to access Azure OpenAI, please refer to the \\",(0,n.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/architecture/ai-ml/openai/architecture/log-monitor-azure-openai\\",rel:\\"nofollow\\",children:\\"Azure documentation\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"By using advanced logging, you can collect the following log data:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Request input text\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Response output text\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Content filter results\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Usage Information\\",`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Input prompt tokens\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Output completion tokens\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Total tokens\\"}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Azure OpenAI integration now collects the API Management Gateway logs. When a question from the user goes to the API Management, it logs the questions and the responses from the GPT models.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai-v2/llm-observability-azure-openai-log-categories.png\\",alt:\\"LLM Observability: Azure OpenAI Logs Overview\\",width:\\"1564\\",height:\\"849\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[`Here\\\\u2019s what a sample log looks like,\\n`,(0,n.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai-v2/llm-observability-advance-log-monitoring.png\\",alt:\\"LLM Observability: Azure OpenAI Advanced Logs\\",width:\\"3449\\",height:\\"1226\\"})]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"content-filtered-results\\",children:\\"Content filtered results\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Azure OpenAI\\\\u2019s content filtering system detects and takes action on specific categories of potentially harmful content in both input prompts and output completions. With Azure OpenAI model deployments, you can use the default content filter or create your own content filter.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now, The integration collects the content filtered result logs. In this example let\'s create a custom filter in the Azure OpenAI Studio that generates an error log.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"By leveraging the \\",(0,n.jsx)(e.strong,{children:\\"Azure Content Filters\\"}),`, you can create your own custom lists of terms or phrases to block or flag.\\n`,(0,n.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai-v2/llm-observability-azure-content-filters.png\\",alt:\\"LLM Observability: Azure OpenAI Set Content Filter\\",width:\\"3419\\",height:\\"1310\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[`And the document ingested in Elastic would look like this:\\n`,(0,n.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai-v2/llm-observability-content-filter-logs.png\\",alt:\\"LLM Observability: Azure OpenAI Content Filter Logs\\",width:\\"3370\\",height:\\"1258\\"}),`\\nThis screenshot provides insights into the content filtered request.`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"ptu-deployment-monitoring\\",children:\\"PTU Deployment Monitoring\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/provisioned-throughput\\",rel:\\"nofollow\\",children:\\"Provisioned throughput units (PTU)\\"}),\\" are units of model processing capacity that you can reserve and deploy for processing prompts and generating completions.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The curated dashboard for PTU Deployment gives comprehensive visibility into metrics such as request latency, active token usage, PTU utilization, and fine-tuning activities, offering a quick snapshot of your deployment\'s health and performance.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here are the essential PTU metrics captured by default:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Time to Response:\\"}),\\" Time taken for the first response to appear after a user send a prompt.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Active Tokens:\\"}),\\" Use this metric to understand your TPS or TPM based utilization for PTUs and compare to the benchmarks for target TPS or TPM scenarios.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Provision-managed Utilization V2:\\"}),\\" Provides insights into utilization percentages, helping prevent overuse and ensuring efficient resource allocation.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Prompt Token Cache Match Rate:\\"}),\\" The prompt token cache hit ratio expressed as a percentage.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai-v2/llm-observability-azure_open_ai_ptu_deployment.png\\",alt:\\"LLM Observability: Azure OpenAI PTU Deployment Metrics Monitoring\\",width:\\"3456\\",height:\\"1462\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"using-billing-for-cost\\",children:\\"Using Billing for cost\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Using the curated overview dashboard you can now monitor the actual usage cost for the AI applications. You are one step away from processing the billing information.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You need to configure and install the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/azure_billing\\",rel:\\"nofollow\\",children:\\"Azure billing metrics integration\\"}),\\". Once the installation is complete the usage cost is visualized for the cognitive services in the Azure OpenAI overview dashboard.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai-v2/llm-observability-azure_openai_billing_overview.png\\",alt:\\"LLM Observability: Azure OpenAI Usage Cost Monitoring\\",width:\\"2539\\",height:\\"2446\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"try-it-out-today\\",children:\\"Try it out today\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Deploy a cluster on our \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/elasticsearch-service\\",rel:\\"nofollow\\",children:\\"Elasticsearch Service\\"}),\\" or \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/downloads/\\",rel:\\"nofollow\\",children:\\"download\\"}),\\" the stack, spin up the new Azure OpenAI integration, open the curated dashboards in Kibana and start monitoring your Azure OpenAI service!\\"]})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return A(z);})();\\n;return Component;"},"_id":"articles/llm-observability-azure-openai-v2.mdx","_raw":{"sourceFilePath":"articles/llm-observability-azure-openai-v2.mdx","sourceFileName":"llm-observability-azure-openai-v2.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/llm-observability-azure-openai-v2"},"type":"Article","imageUrl":"/assets/images/llm-observability-azure-openai-v2/LLM-observability.jpg","readingTime":"4 min read","url":"/llm-observability-azure-openai-v2","headings":[{"level":2,"title":"Advanced Logging and Monitoring","href":"#advanced-logging-and-monitoring"},{"level":3,"title":"Content filtered results","href":"#content-filtered-results"},{"level":2,"title":"PTU Deployment Monitoring","href":"#ptu-deployment-monitoring"},{"level":2,"title":"Using Billing for cost","href":"#using-billing-for-cost"},{"level":2,"title":"Try it out today","href":"#try-it-out-today"}]},{"title":"LLM Observability: Azure OpenAI","slug":"llm-observability-azure-openai","date":"2024-06-24","description":"We are excited to announce the general availability of the Azure OpenAI Integration that provides comprehensive Observability into the performance and usage of the Azure OpenAI Service!","image":"AI_fingertip_touching_human_fingertip.jpg","author":[{"slug":"vinay-chandrasekhar","type":"Author","_raw":{}},{"slug":"andres-rodriguez","type":"Author","_raw":{}}],"tags":[{"slug":"azure-openai","type":"Tag","_raw":{}},{"slug":"azure","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}}],"body":{"raw":"\\nWe are excited to announce the general availability of the [Azure OpenAI Integration](https://www.elastic.co/integrations/data-integrations?solution=all-solutions&category=azure) that provides comprehensive Observability into the performance and usage of the [Azure OpenAI Service](https://azure.microsoft.com/en-us/products/ai-services/openai-service)! Also look at [Part 2 of this blog](https://www.elastic.co/observability-labs/blog/llm-observability-azure-openai-v2)\\n\\nWhile we have offered [visibility into LLM environments](https://www.elastic.co/observability-labs/blog/monitor-openai-api-gpt-models-opentelemetry) for a while now, the addition of our Azure OpenAI integration enables richer out-of-the-box visibility into the performance and usage of your Azure OpenAI based applications, further enhancing LLM Observability.\\n\\n![LLM Observability: Azure OpenAI Monitoring](/assets/images/llm-observability-azure-openai/llm-observability-azure-openai-monitoring.png)\\n\\n\\nThe Azure OpenAI integration leverages [Elastic Agent](https://www.elastic.co/elastic-agent)’s Azure integration capabilities to collect both logs (using [Azure EventHub](https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/stream-monitoring-data-event-hubs)) and metrics (using [Azure Monitor](https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/metrics-index)) to provide deep visibility on the usage of the [Azure OpenAI Service](https://azure.microsoft.com/en-us/products/ai-services/openai-service).\\n\\nThe integration includes an out-of-the-box dashboard that summarizes the most relevant aspects of the service usage, including request and error rates, token usage and chat completion latency.\\n\\n![LLM Observability: Azure OpenAI Monitoring Overview](/assets/images/llm-observability-azure-openai/llm-observability-azure-openai-monitoring-overview.png)\\n\\n## Creating Alerts and SLOs to monitor Azure OpenAI\\n\\nAs with every other Elastic integration, all the [logs](https://www.elastic.co/docs/current/integrations/azure_openai#logs) and [metrics](https://www.elastic.co/docs/current/integrations/azure_openai#metrics) information is fully available to leverage in every capability in [Elastic Observability](https://www.elastic.co/observability), including [SLOs](https://www.elastic.co/guide/en/observability/current/slo.html), [alerting](https://www.elastic.co/guide/en/observability/current/create-alerts.html), custom [dashboards](https://www.elastic.co/guide/en/kibana/current/dashboard.html), in-depth [logs exploration](https://www.elastic.co/guide/en/observability/current/monitor-logs.html), etc. \\n\\nTo create an alert to monitor token usage, for example, start with the Custom Threshold rule on the Azure OpenAI datastream and set an aggregation condition to track and report violations of token usage past a certain threshold. \\n\\n![LLM Observability: Azure OpenAI Monitoring Alert Creation](/assets/images/llm-observability-azure-openai/llm-observability-azure-openai-create-alert.png)\\n\\nWhen a violation occurs, the Alert Details view linked in the alert notification for that alert provides rich context surrounding the violation, such as when the violation started, its current status, and any previous history of such violations, enabling quick triaging, investigation and root cause analysis. \\n\\nSimilarly, to create an SLO to monitor error rates in Azure OpenAI calls, start with the custom query SLI definition adding in the good events to be any result signature at or above 400 over a total value that includes all responses. Then, by setting an appropriate SLO target such as 99%, start monitoring your Azure OpenAI error rate SLO over a period of 7, 30, or 90 days to track degradation and take action before it becomes a pervasive problem. \\n\\n![LLM Observability: Azure OpenAI Monitoring SLO Creation](/assets/images/llm-observability-azure-openai/llm-observability-azure-openai-create-slo.png)\\n\\nPlease refer to the [User Guide](https://www.elastic.co/guide/en/observability/current/monitor-azure-openai.html) to learn more and to get started!\\n","code":"var Component=(()=>{var p=Object.create;var n=Object.defineProperty;var d=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var v=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),f=(i,e)=>{for(var r in e)n(i,r,{get:e[r],enumerable:!0})},l=(i,e,r,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of g(e))!b.call(i,o)&&o!==r&&n(i,o,{get:()=>e[o],enumerable:!(a=d(e,o))||a.enumerable});return i};var w=(i,e,r)=>(r=i!=null?p(m(i)):{},l(e||!i||!i.__esModule?n(r,\\"default\\",{value:i,enumerable:!0}):r,i)),y=i=>l(n({},\\"__esModule\\",{value:!0}),i);var c=v((L,s)=>{s.exports=_jsx_runtime});var A={};f(A,{default:()=>u,frontmatter:()=>z});var t=w(c()),z={title:\\"LLM Observability: Azure OpenAI\\",slug:\\"llm-observability-azure-openai\\",date:\\"2024-06-24\\",description:\\"We are excited to announce the general availability of the Azure OpenAI Integration that provides comprehensive Observability into the performance and usage of the Azure OpenAI Service!\\",author:[{slug:\\"vinay-chandrasekhar\\"},{slug:\\"andres-rodriguez\\"}],image:\\"AI_fingertip_touching_human_fingertip.jpg\\",tags:[{slug:\\"azure-openai\\"},{slug:\\"azure\\"},{slug:\\"genai\\"}]};function h(i){let e={a:\\"a\\",h2:\\"h2\\",img:\\"img\\",p:\\"p\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"We are excited to announce the general availability of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations?solution=all-solutions&category=azure\\",rel:\\"nofollow\\",children:\\"Azure OpenAI Integration\\"}),\\" that provides comprehensive Observability into the performance and usage of the \\",(0,t.jsx)(e.a,{href:\\"https://azure.microsoft.com/en-us/products/ai-services/openai-service\\",rel:\\"nofollow\\",children:\\"Azure OpenAI Service\\"}),\\"! Also look at \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/llm-observability-azure-openai-v2\\",rel:\\"nofollow\\",children:\\"Part 2 of this blog\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"While we have offered \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/monitor-openai-api-gpt-models-opentelemetry\\",rel:\\"nofollow\\",children:\\"visibility into LLM environments\\"}),\\" for a while now, the addition of our Azure OpenAI integration enables richer out-of-the-box visibility into the performance and usage of your Azure OpenAI based applications, further enhancing LLM Observability.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai/llm-observability-azure-openai-monitoring.png\\",alt:\\"LLM Observability: Azure OpenAI Monitoring\\",width:\\"1604\\",height:\\"634\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The Azure OpenAI integration leverages \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/elastic-agent\\",rel:\\"nofollow\\",children:\\"Elastic Agent\\"}),\\"\\\\u2019s Azure integration capabilities to collect both logs (using \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/stream-monitoring-data-event-hubs\\",rel:\\"nofollow\\",children:\\"Azure EventHub\\"}),\\") and metrics (using \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/azure-monitor/reference/supported-metrics/metrics-index\\",rel:\\"nofollow\\",children:\\"Azure Monitor\\"}),\\") to provide deep visibility on the usage of the \\",(0,t.jsx)(e.a,{href:\\"https://azure.microsoft.com/en-us/products/ai-services/openai-service\\",rel:\\"nofollow\\",children:\\"Azure OpenAI Service\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The integration includes an out-of-the-box dashboard that summarizes the most relevant aspects of the service usage, including request and error rates, token usage and chat completion latency.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai/llm-observability-azure-openai-monitoring-overview.png\\",alt:\\"LLM Observability: Azure OpenAI Monitoring Overview\\",width:\\"1984\\",height:\\"1472\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"creating-alerts-and-slos-to-monitor-azure-openai\\",children:\\"Creating Alerts and SLOs to monitor Azure OpenAI\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As with every other Elastic integration, all the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/azure_openai#logs\\",rel:\\"nofollow\\",children:\\"logs\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/azure_openai#metrics\\",rel:\\"nofollow\\",children:\\"metrics\\"}),\\" information is fully available to leverage in every capability in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\"}),\\", including \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/slo.html\\",rel:\\"nofollow\\",children:\\"SLOs\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/create-alerts.html\\",rel:\\"nofollow\\",children:\\"alerting\\"}),\\", custom \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/dashboard.html\\",rel:\\"nofollow\\",children:\\"dashboards\\"}),\\", in-depth \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/monitor-logs.html\\",rel:\\"nofollow\\",children:\\"logs exploration\\"}),\\", etc.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"To create an alert to monitor token usage, for example, start with the Custom Threshold rule on the Azure OpenAI datastream and set an aggregation condition to track and report violations of token usage past a certain threshold.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai/llm-observability-azure-openai-create-alert.png\\",alt:\\"LLM Observability: Azure OpenAI Monitoring Alert Creation\\",width:\\"413\\",height:\\"530\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"When a violation occurs, the Alert Details view linked in the alert notification for that alert provides rich context surrounding the violation, such as when the violation started, its current status, and any previous history of such violations, enabling quick triaging, investigation and root cause analysis.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Similarly, to create an SLO to monitor error rates in Azure OpenAI calls, start with the custom query SLI definition adding in the good events to be any result signature at or above 400 over a total value that includes all responses. Then, by setting an appropriate SLO target such as 99%, start monitoring your Azure OpenAI error rate SLO over a period of 7, 30, or 90 days to track degradation and take action before it becomes a pervasive problem.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/llm-observability-azure-openai/llm-observability-azure-openai-create-slo.png\\",alt:\\"LLM Observability: Azure OpenAI Monitoring SLO Creation\\",width:\\"624\\",height:\\"389\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Please refer to the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/monitor-azure-openai.html\\",rel:\\"nofollow\\",children:\\"User Guide\\"}),\\" to learn more and to get started!\\"]})]})}function u(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return y(A);})();\\n;return Component;"},"_id":"articles/llm-observability-azure-openai.mdx","_raw":{"sourceFilePath":"articles/llm-observability-azure-openai.mdx","sourceFileName":"llm-observability-azure-openai.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/llm-observability-azure-openai"},"type":"Article","imageUrl":"/assets/images/llm-observability-azure-openai/AI_fingertip_touching_human_fingertip.jpg","readingTime":"2 min read","url":"/llm-observability-azure-openai","headings":[{"level":2,"title":"Creating Alerts and SLOs to monitor Azure OpenAI","href":"#creating-alerts-and-slos-to-monitor-azure-openai"}]},{"title":"Convert Logstash pipelines to OpenTelemetry Collector Pipelines","slug":"logstash-to-otel","date":"2024-10-25","description":"This guide helps Logstash users transition to OpenTelemetry by demonstrating how to convert common Logstash pipelines into equivalent OpenTelemetry Collector configurations. We will focus on the log signal.","image":"logstash-otel.jpg","author":[{"slug":"mirko-bez","type":"Author","_raw":{}},{"slug":"taha-derouiche","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"logstash","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\n# Convert Logstash pipelines to OpenTelemetry Collector Pipelines\\n\\n## Introduction\\n\\nElastic observability strategy is increasingly aligned with OpenTelemetry. With the recent launch of [Elastic Distributions of OpenTelemetry](https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry) we’re expanding our offering to make it easier to use OpenTelemetry, the Elastic Agent now offers an [\\"otel\\" mode](https://www.elastic.co/guide/en/fleet/current/otel-agent.html), enabling it to run a custom distribution of the OpenTelemetry Collector, seamlessly enhancing your observability onboarding and experience with Elastic.\\n\\nThis post is designed to assist users familiar with Logstash transitioning to OpenTelemetry by demonstrating how to convert some standard Logstash pipelines into corresponding OpenTelemetry Collector configurations. \\n\\n## What is OpenTelemetry Collector and why should I care?\\n\\n[OpenTelemetry](https://opentelemetry.io/) is an open-source framework that ensures vendor-agnostic data collection, providing a standardized approach for the collection, processing, and ingestion of observability data. Elastic is fully committed to this principle, aiming to make observability truly vendor-agnostic and eliminating the need for users to re-instrument their observability when switching platforms. \\n\\nBy embracing OpenTelemetry, you have access to these benefits:\\n\\n* **Unified Observability**: By using the OpenTelemetry Collector, you can collect and manage logs, metrics, and traces from a single tool, providing holistic observability into your system\'s performance and behavior. This simplifies monitoring and debugging in complex, distributed environments like microservices. \\n* **Flexibility and Scalability**: Whether you\'re running a small service or a large distributed system, the OpenTelemetry Collector can be scaled to handle the amount of data generated, offering the flexibility to deploy as an agent (running alongside applications) or as a gateway (a centralized hub). \\n* **Open Standards**: Since OpenTelemetry is an open-source project under the Cloud Native Computing Foundation (CNCF), it ensures that you\'re working with widely accepted standards, contributing to the long-term sustainability and compatibility of your observability stack. \\n* **Simplified Telemetry Pipelines**: The ability to build pipelines using receivers, processors, and exporters simplifies telemetry management by centralizing data flows and minimizing the need for multiple agents.\\n\\nIn the next sections, we will explain how OTEL Collector and Logstash pipelines are structured, and we will clarify how the steps for each option are used.\\n\\n## OTEL Collector Configuration\\n\\nAn OpenTelemetry Collector [Configuration](https://opentelemetry.io/docs/collector/configuration/) has different sections:\\n\\n* **Receivers**: Collect data from different sources. \\n* **Processors**: Transform the data collected by receivers\\n* **Exporters**: Send data to different collectors \\n* **Connectors**: Link two pipelines together \\n* **Service**: defines which components are active \\n * **Pipelines**: Combine the defined receivers, processors, exporters, and connectors to process the data \\n * **Extensions** are optional components that expand the capabilities of the Collector to accomplish tasks not directly involved with processing telemetry data (e.g., health monitoring) \\n * **Telemetry** where you can set observability for the collector itself (e.g., logging and monitoring)\\n\\nWe can visualize it schematically as follows:\\n\\n![otel-config-schema](/assets/images/logstash-to-otel/otel-config-schema.png)\\n\\nWe refer to the official documentation [Configuration | OpenTelemetry](https://opentelemetry.io/docs/collector/configuration/) for an in-depth introduction in the components. \\n\\n## Logstash pipeline definition\\n\\nA [Logstash pipeline](https://www.elastic.co/guide/en/logstash/current/configuration-file-structure.html) is composed of three main components:\\n\\n* Input Plugins: Allow us to read data from different sources \\n* Filters Plugins: Allow us to transform and filter the data \\n* Output Plugins: Allow us to send the data\\n\\nLogstash also has a special input and a special output that allow the pipeline-to-pipeline communication, we can consider this as a similar concept to an OpenTelemetry connector.\\n\\n## Logstash pipeline compared to Otel Collector components\\n\\nWe can schematize how Logstash Pipeline and OTEL Collector pipeline components can relate to each other as follows:\\n\\n![logstash-pipeline-to-otel-pipeline](/assets/images/logstash-to-otel/logstash-pipeline-to-otel-pipeline.png)\\n\\nEnough theory! Let us dive into some examples.\\n\\n## Convert a Logstash Pipeline into OpenTelemetry Collector Pipeline\\n\\n### Example 1: Parse and transform log line\\n\\nLet\'s consider the below line:\\n\\n```\\n2024-09-20T08:33:27: user frank accessed from 89.66.167.22:10592 path /blog with error 404\\n```\\n\\nWe will apply the following steps:\\n\\n1. Read the line from the file `/tmp/demo-line.log`.\\n2. Define the output to be an Elasticsearch datastream `logs-access-default`.\\n3. Extract the `@timestamp`, `user.name`, `client.ip`, `client.port`, `url.path` and `http.status.code`. \\n4. Drop log messages related to the `SYSTEM` user. \\n5. Parse the date timestamp with the relevant date format and store it in `@timestamp`. \\n6. Add a code `http.status.code_description` based on known codes\' descriptions. \\n7. Send data to Elasticsearch.\\n\\n**Logstash pipeline**\\n\\n```ruby\\ninput {\\n file {\\n path => \\"/tmp/demo-line.log\\" #[1]\\n start_position => \\"beginning\\"\\n add_field => { #[2]\\n \\"[data_stream][type]\\" => \\"logs\\"\\n \\"[data_stream][dataset]\\" => \\"access_log\\"\\n \\"[data_stream][namespace]\\" => \\"default\\"\\n }\\n }\\n}\\n\\nfilter {\\n grok { #[3]\\n match => {\\n \\"message\\" => \\"%{TIMESTAMP_ISO8601:[date]}: user %{WORD:[user][name]} accessed from %{IP:[client][ip]}:%{NUMBER:[client][port]:int} path %{URIPATH:[url][path]} with error %{NUMBER:[http][status][code]}\\"\\n }\\n }\\n if \\"_grokparsefailure\\" not in [tags] {\\n if [user][name] == \\"SYSTEM\\" { #[4]\\n drop {}\\n }\\n date { #[5]\\n match => [\\"[date]\\", \\"ISO8601\\"]\\n target => \\"[@timestamp]\\"\\n timezone => \\"UTC\\"\\n remove_field => [ \\"date\\" ]\\n }\\n translate { #[6]\\n source => \\"[http][status][code]\\"\\n target => \\"[http][status][code_description]\\"\\n dictionary => {\\n \\"200\\" => \\"OK\\"\\n \\"403\\" => \\"Permission denied\\"\\n \\"404\\" => \\"Not Found\\"\\n \\"500\\" => \\"Server Error\\"\\n }\\n fallback => \\"Unknown error\\"\\n }\\n }\\n}\\n\\noutput {\\n elasticsearch { #[7]\\n hosts => \\"elasticsearch-enpoint:443\\"\\n api_key => \\"${ES_API_KEY}\\"\\n }\\n}\\n```\\n\\n**OpenTelemtry Collector configuration**\\n\\n```yaml\\nreceivers:\\n filelog: #[1]\\n start_at: beginning\\n include:\\n - /tmp/demo-line.log\\n include_file_name: false\\n include_file_path: true\\n storage: file_storage \\n operators:\\n # Copy the raw message into event.original (this is done OOTB by Logstash in ECS mode)\\n - type: copy\\n from: body\\n to: attributes[\'event.original\']\\n - type: add #[2]\\n field: attributes[\\"data_stream.type\\"]\\n value: \\"logs\\"\\n - type: add #[2]\\n field: attributes[\\"data_stream.dataset\\"]\\n value: \\"access_log_otel\\" \\n - type: add #[2]\\n field: attributes[\\"data_stream.namespace\\"]\\n value: \\"default\\"\\n\\nextensions:\\n file_storage:\\n directory: /var/lib/otelcol/file_storage\\n\\nprocessors:\\n # Adding host.name (this is done OOTB by Logstash)\\n resourcedetection/system:\\n detectors: [\\"system\\"]\\n system:\\n hostname_sources: [\\"os\\"]\\n resource_attributes:\\n os.type:\\n enabled: false\\n\\n transform/grok: #[3]\\n log_statements:\\n - context: log\\n statements:\\n - \'merge_maps(attributes, ExtractGrokPatterns(attributes[\\"event.original\\"], \\"%{TIMESTAMP_ISO8601:date}: user %{WORD:user.name} accessed from %{IP:client.ip}:%{NUMBER:client.port:int} path %{URIPATH:url.path} with error %{NUMBER:http.status.code}\\", true), \\"insert\\")\'\\n\\n filter/exclude_system_user: #[4]\\n error_mode: ignore\\n logs:\\n log_record:\\n - attributes[\\"user.name\\"] == \\"SYSTEM\\"\\n\\n transform/parse_date: #[5]\\n log_statements:\\n - context: log\\n statements:\\n - set(time, Time(attributes[\\"date\\"], \\"%Y-%m-%dT%H:%M:%S\\"))\\n - delete_key(attributes, \\"date\\")\\n conditions:\\n - attributes[\\"date\\"] != nil\\n\\n transform/translate_status_code: #[6]\\n log_statements:\\n - context: log\\n conditions:\\n - attributes[\\"http.status.code\\"] != nil\\n statements:\\n - set(attributes[\\"http.status.code_description\\"], \\"OK\\") where attributes[\\"http.status.code\\"] == \\"200\\"\\n - set(attributes[\\"http.status.code_description\\"], \\"Permission Denied\\") where attributes[\\"http.status.code\\"] == \\"403\\"\\n - set(attributes[\\"http.status.code_description\\"], \\"Not Found\\") where attributes[\\"http.status.code\\"] == \\"404\\"\\n - set(attributes[\\"http.status.code_description\\"], \\"Server Error\\") where attributes[\\"http.status.code\\"] == \\"500\\"\\n - set(attributes[\\"http.status.code_description\\"], \\"Unknown Error\\") where attributes[\\"http.status.code_description\\"] == nil\\n\\nexporters:\\n elasticsearch: #[7]\\n endpoints: [\\"elasticsearch-enpoint:443\\"]\\n api_key: ${env:ES_API_KEY}\\n tls:\\n logs_dynamic_index:\\n enabled: true\\n mapping:\\n mode: ecs\\n\\nservice:\\n extensions: [file_storage]\\n pipelines:\\n logs:\\n receivers:\\n - filelog\\n processors:\\n - resourcedetection/system\\n - transform/grok\\n - filter/exclude_system_user\\n - transform/parse_date\\n - transform/translate_status_code\\n exporters:\\n - elasticsearch\\n```\\n\\nThese will generate the following document in Elasticsearch\\n\\n```json\\n{\\n \\"@timestamp\\": \\"2024-09-20T08:33:27.000Z\\",\\n \\"client\\": {\\n \\"ip\\": \\"89.66.167.22\\",\\n \\"port\\": 10592\\n },\\n \\"data_stream\\": {\\n \\"dataset\\": \\"access_log\\",\\n \\"namespace\\": \\"default\\",\\n \\"type\\": \\"logs\\"\\n },\\n \\"event\\": {\\n \\"original\\": \\"2024-09-20T08:33:27: user frank accessed from 89.66.167.22:10592 path /blog with error 404\\"\\n },\\n \\"host\\": {\\n \\"hostname\\": \\"my-laptop\\",\\n \\"name\\": \\"my-laptop\\",\\n },\\n \\"http\\": {\\n \\"status\\": {\\n \\"code\\": \\"404\\",\\n \\"code_description\\": \\"Not Found\\"\\n }\\n },\\n \\"log\\": {\\n \\"file\\": {\\n \\"path\\": \\"/tmp/demo-line.log\\"\\n }\\n },\\n \\"message\\": \\"2024-09-20T08:33:27: user frank accessed from 89.66.167.22:10592 path /blog with error 404\\",\\n \\"url\\": {\\n \\"path\\": \\"/blog\\"\\n },\\n \\"user\\": {\\n \\"name\\": \\"frank\\"\\n }\\n}\\n```\\n\\n### Example 2: Parse and transform a NDJSON-formatted log file\\n\\nLet\'s consider the below json line:\\n\\n```json\\n{\\"log_level\\":\\"INFO\\",\\"message\\":\\"User login successful\\",\\"service\\":\\"auth-service\\",\\"timestamp\\":\\"2024-10-11 12:34:56.123 +0100\\",\\"user\\":{\\"id\\":\\"A1230\\",\\"name\\":\\"john_doe\\"}}\\n```\\n\\nWe will apply the following steps:\\n\\n1. Read a line from the file `/tmp/demo.ndjson`. \\n2. Define the output to be an Elasticsearch datastream `logs-json-default` \\n3. Parse the JSON and assign relevant keys and values. \\n4. Parse the date. \\n5. Override the message field. \\n6. Rename fields to follow ECS conventions. \\n7. Send data to Elasticsearch.\\n\\n**Logstash pipeline**\\n\\n```ruby\\ninput {\\n file {\\n path => \\"/tmp/demo.ndjson\\" #[1]\\n start_position => \\"beginning\\"\\n add_field => { #[2]\\n \\"[data_stream][type]\\" => \\"logs\\"\\n \\"[data_stream][dataset]\\" => \\"json\\"\\n \\"[data_stream][namespace]\\" => \\"default\\"\\n }\\n }\\n}\\n\\nfilter {\\n if [message] =~ /^\\\\{.*/ {\\n json { #[3] & #[5]\\n source => \\"message\\"\\n }\\n }\\n date { #[4]\\n match => [\\"[timestamp]\\", \\"yyyy-MM-dd HH:mm:ss.SSS Z\\"]\\n remove_field => \\"[timestamp]\\"\\n }\\n mutate {\\n rename => { #[6]\\n \\"service\\" => \\"[service][name]\\"\\n \\"log_level\\" => \\"[log][level]\\"\\n }\\n }\\n}\\n\\n\\noutput {\\n elasticsearch { # [7]\\n hosts => \\"elasticsearch-enpoint:443\\"\\n api_key => \\"${ES_API_KEY}\\"\\n }\\n}\\n```\\n\\n**OpenTelemtry Collector configuration**\\n\\n```yaml\\nreceivers:\\n filelog/json: # [1]\\n include: \\n - /tmp/demo.ndjson\\n retry_on_failure:\\n enabled: true\\n start_at: beginning\\n storage: file_storage \\n operators:\\n # Copy the raw message into event.original (this is done OOTB by Logstash in ECS mode)\\n - type: copy\\n from: body\\n to: attributes[\'event.original\']\\n - type: add #[2]\\n field: attributes[\\"data_stream.type\\"]\\n value: \\"logs\\" \\n - type: add #[2]\\n field: attributes[\\"data_stream.dataset\\"]\\n value: \\"otel\\" #[2]\\n - type: add\\n field: attributes[\\"data_stream.namespace\\"]\\n value: \\"default\\" \\n\\n\\nextensions:\\n file_storage:\\n directory: /var/lib/otelcol/file_storage\\n\\nprocessors:\\n # Adding host.name (this is done OOTB by Logstash)\\n resourcedetection/system:\\n detectors: [\\"system\\"]\\n system:\\n hostname_sources: [\\"os\\"]\\n resource_attributes:\\n os.type:\\n enabled: false\\n\\n transform/json_parse: #[3]\\n error_mode: ignore\\n log_statements:\\n - context: log\\n statements:\\n - merge_maps(attributes, ParseJSON(body), \\"upsert\\")\\n conditions: \\n - IsMatch(body, \\"^\\\\\\\\{\\")\\n \\n\\n transform/parse_date: #[4]\\n error_mode: ignore\\n log_statements:\\n - context: log\\n statements:\\n - set(time, Time(attributes[\\"timestamp\\"], \\"%Y-%m-%d %H:%M:%S.%L %z\\"))\\n - delete_key(attributes, \\"timestamp\\")\\n conditions: \\n - attributes[\\"timestamp\\"] != nil\\n\\n transform/override_message_field: [5]\\n error_mode: ignore\\n log_statements:\\n - context: log\\n statements:\\n - set(body, attributes[\\"message\\"])\\n - delete_key(attributes, \\"message\\")\\n\\n transform/set_log_severity: # [6]\\n error_mode: ignore\\n log_statements:\\n - context: log\\n statements:\\n - set(severity_text, attributes[\\"log_level\\"]) \\n\\n attributes/rename_attributes: #[6]\\n actions:\\n - key: service.name\\n from_attribute: service\\n action: insert\\n - key: service\\n action: delete\\n - key: log_level\\n action: delete\\n\\nexporters:\\n elasticsearch: #[7]\\n endpoints: [\\"elasticsearch-enpoint:443\\"]\\n api_key: ${env:ES_API_KEY}\\n tls:\\n logs_dynamic_index:\\n enabled: true\\n mapping:\\n mode: ecs\\n\\nservice:\\n extensions: [file_storage]\\n pipelines:\\n logs/json:\\n receivers: \\n - filelog/json\\n processors:\\n - resourcedetection/system \\n - transform/json_parse\\n - transform/parse_date \\n - transform/override_message_field\\n - transform/set_log_severity\\n - attributes/rename_attributes\\n exporters: \\n - elasticsearch\\n\\n```\\n\\nThese will generate the following document in Elasticsearch\\n\\n```json\\n{\\n \\"@timestamp\\": \\"2024-10-11T12:34:56.123000000Z\\",\\n \\"data_stream\\": {\\n \\"dataset\\": \\"otel\\",\\n \\"namespace\\": \\"default\\",\\n \\"type\\": \\"logs\\"\\n },\\n \\"event\\": {\\n \\"original\\": \\"{\\\\\\"log_level\\\\\\":\\\\\\"WARNING\\\\\\",\\\\\\"message\\\\\\":\\\\\\"User login successful\\\\\\",\\\\\\"service\\\\\\":\\\\\\"auth-service\\\\\\",\\\\\\"timestamp\\\\\\":\\\\\\"2024-10-11 12:34:56.123 +0100\\\\\\",\\\\\\"user\\\\\\":{\\\\\\"id\\\\\\":\\\\\\"A1230\\\\\\",\\\\\\"name\\\\\\":\\\\\\"john_doe\\\\\\"}}\\"\\n },\\n \\"host\\": {\\n \\"hostname\\": \\"my-laptop\\",\\n \\"name\\": \\"my-laptop\\",\\n },\\n \\"log\\": {\\n \\"file\\": {\\n \\"name\\": \\"json.log\\"\\n },\\n \\"level\\": \\"WARNING\\"\\n },\\n \\"message\\": \\"User login successful\\",\\n \\"service\\": {\\n \\"name\\": \\"auth-service\\"\\n },\\n \\"user\\": {\\n \\"id\\": \\"A1230\\",\\n \\"name\\": \\"john_doe\\"\\n }\\n}\\n\\n```\\n\\n## Conclusion\\n\\nIn this post, we showed examples of how to convert a typical Logstash pipeline into an OpenTelemetry Collector pipeline for logs. While OpenTelemetry provides powerful tools for collecting and exporting logs, if your pipeline relies on complex transformations or scripting, Logstash remains a superior choice. This is because Logstash offers a broader range of built-in features and a more flexible approach to handling advanced data manipulation tasks.\\n\\n## What\'s Next?\\n\\nNow that you\'ve seen basic (but realistic) examples of converting a Logstash pipeline to OpenTelemetry, it\'s your turn to dive deeper. Depending on your needs, you can explore further and find more detailed resources in the following repositories:\\n\\n* [OpenTelemetry Collector](https://github.com/open-telemetry/opentelemetry-collector): Learn about the core OpenTelemetry components, from receivers to exporters. \\n* [OpenTelemetry Collector Contrib](https://github.com/open-telemetry/opentelemetry-collector-contrib): Find community-contributed components for a wider range of integrations and features. \\n* [Elastic\'s opentelemetry-collector-components](https://github.com/elastic/opentelemetry-collector-components): Dive into Elastic\'s extensions for the OpenTelemetry Collector, offering more tailored features for Elastic Stack users.\\n\\nIf you encounter specific challenges or need to handle more advanced use cases, these repositories will be an excellent resource for discovering additional components or integrations that can enhance your pipeline. All these repositories have a similar structure with folders named `receiver`, `processor`, `exporter`, `connector`, which should be familiar after reading this blog. Whether you are migrating a simple Logstash pipeline or tackling more complex data transformations, these tools and communities will provide the support you need for a successful OpenTelemetry implementation.","code":"var Component=(()=>{var p=Object.create;var s=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),b=(t,e)=>{for(var o in e)s(t,o,{get:e[o],enumerable:!0})},l=(t,e,o,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of g(e))!f.call(t,i)&&i!==o&&s(t,i,{get:()=>e[i],enumerable:!(r=m(e,i))||r.enumerable});return t};var _=(t,e,o)=>(o=t!=null?p(u(t)):{},l(e||!t||!t.__esModule?s(o,\\"default\\",{value:t,enumerable:!0}):o,t)),v=t=>l(s({},\\"__esModule\\",{value:!0}),t);var c=y((O,a)=>{a.exports=_jsx_runtime});var T={};b(T,{default:()=>h,frontmatter:()=>w});var n=_(c()),w={title:\\"Convert Logstash pipelines to OpenTelemetry Collector Pipelines\\",slug:\\"logstash-to-otel\\",date:\\"2024-10-25\\",description:\\"This guide helps Logstash users transition to OpenTelemetry by demonstrating how to convert common Logstash pipelines into equivalent OpenTelemetry Collector configurations. We will focus on the log signal.\\",author:[{slug:\\"mirko-bez\\"},{slug:\\"taha-derouiche\\"}],image:\\"logstash-otel.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"logstash\\"},{slug:\\"log-analytics\\"}]};function d(t){let e={a:\\"a\\",code:\\"code\\",h1:\\"h1\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.h1,{id:\\"convert-logstash-pipelines-to-opentelemetry-collector-pipelines\\",children:\\"Convert Logstash pipelines to OpenTelemetry Collector Pipelines\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"introduction\\",children:\\"Introduction\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic observability strategy is increasingly aligned with OpenTelemetry. With the recent launch of \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distributions-opentelemetry\\",rel:\\"nofollow\\",children:\\"Elastic Distributions of OpenTelemetry\\"}),\\" we\\\\u2019re expanding our offering to make it easier to use OpenTelemetry, the Elastic Agent now offers an \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/otel-agent.html\\",rel:\\"nofollow\\",children:\'\\"otel\\" mode\'}),\\", enabling it to run a custom distribution of the OpenTelemetry Collector, seamlessly enhancing your observability onboarding and experience with Elastic.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"This post is designed to assist users familiar with Logstash transitioning to OpenTelemetry by demonstrating how to convert some standard Logstash pipelines into corresponding OpenTelemetry Collector configurations.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"what-is-opentelemetry-collector-and-why-should-i-care\\",children:\\"What is OpenTelemetry Collector and why should I care?\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" is an open-source framework that ensures vendor-agnostic data collection, providing a standardized approach for the collection, processing, and ingestion of observability data. Elastic is fully committed to this principle, aiming to make observability truly vendor-agnostic and eliminating the need for users to re-instrument their observability when switching platforms.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"By embracing OpenTelemetry, you have access to these benefits:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Unified Observability\\"}),\\": By using the OpenTelemetry Collector, you can collect and manage logs, metrics, and traces from a single tool, providing holistic observability into your system\'s performance and behavior. This simplifies monitoring and debugging in complex, distributed environments like microservices.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Flexibility and Scalability\\"}),\\": Whether you\'re running a small service or a large distributed system, the OpenTelemetry Collector can be scaled to handle the amount of data generated, offering the flexibility to deploy as an agent (running alongside applications) or as a gateway (a centralized hub).\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Open Standards\\"}),\\": Since OpenTelemetry is an open-source project under the Cloud Native Computing Foundation (CNCF), it ensures that you\'re working with widely accepted standards, contributing to the long-term sustainability and compatibility of your observability stack.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Simplified Telemetry Pipelines\\"}),\\": The ability to build pipelines using receivers, processors, and exporters simplifies telemetry management by centralizing data flows and minimizing the need for multiple agents.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In the next sections, we will explain how OTEL Collector and Logstash pipelines are structured, and we will clarify how the steps for each option are used.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"otel-collector-configuration\\",children:\\"OTEL Collector Configuration\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"An OpenTelemetry Collector \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/configuration/\\",rel:\\"nofollow\\",children:\\"Configuration\\"}),\\" has different sections:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Receivers\\"}),\\": Collect data from different sources.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Processors\\"}),\\": Transform the data collected by receivers\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Exporters\\"}),\\": Send data to different collectors\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Connectors\\"}),\\": Link two pipelines together\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Service\\"}),\\": defines which components are active\\",`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Pipelines\\"}),\\": Combine the defined receivers, processors, exporters, and connectors to process the data\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Extensions\\"}),\\" are optional components that expand the capabilities of the Collector to accomplish tasks not directly involved with processing telemetry data (e.g., health monitoring)\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Telemetry\\"}),\\" where you can set observability for the collector itself (e.g., logging and monitoring)\\"]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"We can visualize it schematically as follows:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/logstash-to-otel/otel-config-schema.png\\",alt:\\"otel-config-schema\\",width:\\"459\\",height:\\"1273\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We refer to the official documentation \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/collector/configuration/\\",rel:\\"nofollow\\",children:\\"Configuration | OpenTelemetry\\"}),\\" for an in-depth introduction in the components.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"logstash-pipeline-definition\\",children:\\"Logstash pipeline definition\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"A \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/logstash/current/configuration-file-structure.html\\",rel:\\"nofollow\\",children:\\"Logstash pipeline\\"}),\\" is composed of three main components:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Input Plugins: Allow us to read data from different sources\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Filters Plugins: Allow us to transform and filter the data\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Output Plugins: Allow us to send the data\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Logstash also has a special input and a special output that allow the pipeline-to-pipeline communication, we can consider this as a similar concept to an OpenTelemetry connector.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"logstash-pipeline-compared-to-otel-collector-components\\",children:\\"Logstash pipeline compared to Otel Collector components\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We can schematize how Logstash Pipeline and OTEL Collector pipeline components can relate to each other as follows:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/logstash-to-otel/logstash-pipeline-to-otel-pipeline.png\\",alt:\\"logstash-pipeline-to-otel-pipeline\\",width:\\"922\\",height:\\"901\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Enough theory! Let us dive into some examples.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"convert-a-logstash-pipeline-into-opentelemetry-collector-pipeline\\",children:\\"Convert a Logstash Pipeline into OpenTelemetry Collector Pipeline\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"example-1-parse-and-transform-log-line\\",children:\\"Example 1: Parse and transform log line\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\'s consider the below line:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`2024-09-20T08:33:27: user frank accessed from 89.66.167.22:10592 path /blog with error 404\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We will apply the following steps:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Read the line from the file \\",(0,n.jsx)(e.code,{children:\\"/tmp/demo-line.log\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Define the output to be an Elasticsearch datastream \\",(0,n.jsx)(e.code,{children:\\"logs-access-default\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Extract the \\",(0,n.jsx)(e.code,{children:\\"@timestamp\\"}),\\", \\",(0,n.jsx)(e.code,{children:\\"user.name\\"}),\\", \\",(0,n.jsx)(e.code,{children:\\"client.ip\\"}),\\", \\",(0,n.jsx)(e.code,{children:\\"client.port\\"}),\\", \\",(0,n.jsx)(e.code,{children:\\"url.path\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"http.status.code\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Drop log messages related to the \\",(0,n.jsx)(e.code,{children:\\"SYSTEM\\"}),\\" user.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Parse the date timestamp with the relevant date format and store it in \\",(0,n.jsx)(e.code,{children:\\"@timestamp\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Add a code \\",(0,n.jsx)(e.code,{children:\\"http.status.code_description\\"}),\\" based on known codes\' descriptions.\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Send data to Elasticsearch.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Logstash pipeline\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-ruby\\",children:`input {\\n file {\\n path => \\"/tmp/demo-line.log\\" #[1]\\n start_position => \\"beginning\\"\\n add_field => { #[2]\\n \\"[data_stream][type]\\" => \\"logs\\"\\n \\"[data_stream][dataset]\\" => \\"access_log\\"\\n \\"[data_stream][namespace]\\" => \\"default\\"\\n }\\n }\\n}\\n\\nfilter {\\n grok { #[3]\\n match => {\\n \\"message\\" => \\"%{TIMESTAMP_ISO8601:[date]}: user %{WORD:[user][name]} accessed from %{IP:[client][ip]}:%{NUMBER:[client][port]:int} path %{URIPATH:[url][path]} with error %{NUMBER:[http][status][code]}\\"\\n }\\n }\\n if \\"_grokparsefailure\\" not in [tags] {\\n if [user][name] == \\"SYSTEM\\" { #[4]\\n drop {}\\n }\\n date { #[5]\\n match => [\\"[date]\\", \\"ISO8601\\"]\\n target => \\"[@timestamp]\\"\\n timezone => \\"UTC\\"\\n remove_field => [ \\"date\\" ]\\n }\\n translate { #[6]\\n source => \\"[http][status][code]\\"\\n target => \\"[http][status][code_description]\\"\\n dictionary => {\\n \\"200\\" => \\"OK\\"\\n \\"403\\" => \\"Permission denied\\"\\n \\"404\\" => \\"Not Found\\"\\n \\"500\\" => \\"Server Error\\"\\n }\\n fallback => \\"Unknown error\\"\\n }\\n }\\n}\\n\\noutput {\\n elasticsearch { #[7]\\n hosts => \\"elasticsearch-enpoint:443\\"\\n api_key => \\"\\\\${ES_API_KEY}\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"OpenTelemtry Collector configuration\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`receivers:\\n filelog: #[1]\\n start_at: beginning\\n include:\\n - /tmp/demo-line.log\\n include_file_name: false\\n include_file_path: true\\n storage: file_storage \\n operators:\\n # Copy the raw message into event.original (this is done OOTB by Logstash in ECS mode)\\n - type: copy\\n from: body\\n to: attributes[\'event.original\']\\n - type: add #[2]\\n field: attributes[\\"data_stream.type\\"]\\n value: \\"logs\\"\\n - type: add #[2]\\n field: attributes[\\"data_stream.dataset\\"]\\n value: \\"access_log_otel\\" \\n - type: add #[2]\\n field: attributes[\\"data_stream.namespace\\"]\\n value: \\"default\\"\\n\\nextensions:\\n file_storage:\\n directory: /var/lib/otelcol/file_storage\\n\\nprocessors:\\n # Adding host.name (this is done OOTB by Logstash)\\n resourcedetection/system:\\n detectors: [\\"system\\"]\\n system:\\n hostname_sources: [\\"os\\"]\\n resource_attributes:\\n os.type:\\n enabled: false\\n\\n transform/grok: #[3]\\n log_statements:\\n - context: log\\n statements:\\n - \'merge_maps(attributes, ExtractGrokPatterns(attributes[\\"event.original\\"], \\"%{TIMESTAMP_ISO8601:date}: user %{WORD:user.name} accessed from %{IP:client.ip}:%{NUMBER:client.port:int} path %{URIPATH:url.path} with error %{NUMBER:http.status.code}\\", true), \\"insert\\")\'\\n\\n filter/exclude_system_user: #[4]\\n error_mode: ignore\\n logs:\\n log_record:\\n - attributes[\\"user.name\\"] == \\"SYSTEM\\"\\n\\n transform/parse_date: #[5]\\n log_statements:\\n - context: log\\n statements:\\n - set(time, Time(attributes[\\"date\\"], \\"%Y-%m-%dT%H:%M:%S\\"))\\n - delete_key(attributes, \\"date\\")\\n conditions:\\n - attributes[\\"date\\"] != nil\\n\\n transform/translate_status_code: #[6]\\n log_statements:\\n - context: log\\n conditions:\\n - attributes[\\"http.status.code\\"] != nil\\n statements:\\n - set(attributes[\\"http.status.code_description\\"], \\"OK\\") where attributes[\\"http.status.code\\"] == \\"200\\"\\n - set(attributes[\\"http.status.code_description\\"], \\"Permission Denied\\") where attributes[\\"http.status.code\\"] == \\"403\\"\\n - set(attributes[\\"http.status.code_description\\"], \\"Not Found\\") where attributes[\\"http.status.code\\"] == \\"404\\"\\n - set(attributes[\\"http.status.code_description\\"], \\"Server Error\\") where attributes[\\"http.status.code\\"] == \\"500\\"\\n - set(attributes[\\"http.status.code_description\\"], \\"Unknown Error\\") where attributes[\\"http.status.code_description\\"] == nil\\n\\nexporters:\\n elasticsearch: #[7]\\n endpoints: [\\"elasticsearch-enpoint:443\\"]\\n api_key: \\\\${env:ES_API_KEY}\\n tls:\\n logs_dynamic_index:\\n enabled: true\\n mapping:\\n mode: ecs\\n\\nservice:\\n extensions: [file_storage]\\n pipelines:\\n logs:\\n receivers:\\n - filelog\\n processors:\\n - resourcedetection/system\\n - transform/grok\\n - filter/exclude_system_user\\n - transform/parse_date\\n - transform/translate_status_code\\n exporters:\\n - elasticsearch\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"These will generate the following document in Elasticsearch\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"@timestamp\\": \\"2024-09-20T08:33:27.000Z\\",\\n \\"client\\": {\\n \\"ip\\": \\"89.66.167.22\\",\\n \\"port\\": 10592\\n },\\n \\"data_stream\\": {\\n \\"dataset\\": \\"access_log\\",\\n \\"namespace\\": \\"default\\",\\n \\"type\\": \\"logs\\"\\n },\\n \\"event\\": {\\n \\"original\\": \\"2024-09-20T08:33:27: user frank accessed from 89.66.167.22:10592 path /blog with error 404\\"\\n },\\n \\"host\\": {\\n \\"hostname\\": \\"my-laptop\\",\\n \\"name\\": \\"my-laptop\\",\\n },\\n \\"http\\": {\\n \\"status\\": {\\n \\"code\\": \\"404\\",\\n \\"code_description\\": \\"Not Found\\"\\n }\\n },\\n \\"log\\": {\\n \\"file\\": {\\n \\"path\\": \\"/tmp/demo-line.log\\"\\n }\\n },\\n \\"message\\": \\"2024-09-20T08:33:27: user frank accessed from 89.66.167.22:10592 path /blog with error 404\\",\\n \\"url\\": {\\n \\"path\\": \\"/blog\\"\\n },\\n \\"user\\": {\\n \\"name\\": \\"frank\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"example-2-parse-and-transform-a-ndjson-formatted-log-file\\",children:\\"Example 2: Parse and transform a NDJSON-formatted log file\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\'s consider the below json line:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\"log_level\\":\\"INFO\\",\\"message\\":\\"User login successful\\",\\"service\\":\\"auth-service\\",\\"timestamp\\":\\"2024-10-11 12:34:56.123 +0100\\",\\"user\\":{\\"id\\":\\"A1230\\",\\"name\\":\\"john_doe\\"}}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We will apply the following steps:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Read a line from the file \\",(0,n.jsx)(e.code,{children:\\"/tmp/demo.ndjson\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Define the output to be an Elasticsearch datastream \\",(0,n.jsx)(e.code,{children:\\"logs-json-default\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Parse the JSON and assign relevant keys and values.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Parse the date.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Override the message field.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Rename fields to follow ECS conventions.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Send data to Elasticsearch.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Logstash pipeline\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-ruby\\",children:`input {\\n file {\\n path => \\"/tmp/demo.ndjson\\" #[1]\\n start_position => \\"beginning\\"\\n add_field => { #[2]\\n \\"[data_stream][type]\\" => \\"logs\\"\\n \\"[data_stream][dataset]\\" => \\"json\\"\\n \\"[data_stream][namespace]\\" => \\"default\\"\\n }\\n }\\n}\\n\\nfilter {\\n if [message] =~ /^\\\\\\\\{.*/ {\\n json { #[3] & #[5]\\n source => \\"message\\"\\n }\\n }\\n date { #[4]\\n match => [\\"[timestamp]\\", \\"yyyy-MM-dd HH:mm:ss.SSS Z\\"]\\n remove_field => \\"[timestamp]\\"\\n }\\n mutate {\\n rename => { #[6]\\n \\"service\\" => \\"[service][name]\\"\\n \\"log_level\\" => \\"[log][level]\\"\\n }\\n }\\n}\\n\\n\\noutput {\\n elasticsearch { # [7]\\n hosts => \\"elasticsearch-enpoint:443\\"\\n api_key => \\"\\\\${ES_API_KEY}\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"OpenTelemtry Collector configuration\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`receivers:\\n filelog/json: # [1]\\n include: \\n - /tmp/demo.ndjson\\n retry_on_failure:\\n enabled: true\\n start_at: beginning\\n storage: file_storage \\n operators:\\n # Copy the raw message into event.original (this is done OOTB by Logstash in ECS mode)\\n - type: copy\\n from: body\\n to: attributes[\'event.original\']\\n - type: add #[2]\\n field: attributes[\\"data_stream.type\\"]\\n value: \\"logs\\" \\n - type: add #[2]\\n field: attributes[\\"data_stream.dataset\\"]\\n value: \\"otel\\" #[2]\\n - type: add\\n field: attributes[\\"data_stream.namespace\\"]\\n value: \\"default\\" \\n\\n\\nextensions:\\n file_storage:\\n directory: /var/lib/otelcol/file_storage\\n\\nprocessors:\\n # Adding host.name (this is done OOTB by Logstash)\\n resourcedetection/system:\\n detectors: [\\"system\\"]\\n system:\\n hostname_sources: [\\"os\\"]\\n resource_attributes:\\n os.type:\\n enabled: false\\n\\n transform/json_parse: #[3]\\n error_mode: ignore\\n log_statements:\\n - context: log\\n statements:\\n - merge_maps(attributes, ParseJSON(body), \\"upsert\\")\\n conditions: \\n - IsMatch(body, \\"^\\\\\\\\\\\\\\\\{\\")\\n \\n\\n transform/parse_date: #[4]\\n error_mode: ignore\\n log_statements:\\n - context: log\\n statements:\\n - set(time, Time(attributes[\\"timestamp\\"], \\"%Y-%m-%d %H:%M:%S.%L %z\\"))\\n - delete_key(attributes, \\"timestamp\\")\\n conditions: \\n - attributes[\\"timestamp\\"] != nil\\n\\n transform/override_message_field: [5]\\n error_mode: ignore\\n log_statements:\\n - context: log\\n statements:\\n - set(body, attributes[\\"message\\"])\\n - delete_key(attributes, \\"message\\")\\n\\n transform/set_log_severity: # [6]\\n error_mode: ignore\\n log_statements:\\n - context: log\\n statements:\\n - set(severity_text, attributes[\\"log_level\\"]) \\n\\n attributes/rename_attributes: #[6]\\n actions:\\n - key: service.name\\n from_attribute: service\\n action: insert\\n - key: service\\n action: delete\\n - key: log_level\\n action: delete\\n\\nexporters:\\n elasticsearch: #[7]\\n endpoints: [\\"elasticsearch-enpoint:443\\"]\\n api_key: \\\\${env:ES_API_KEY}\\n tls:\\n logs_dynamic_index:\\n enabled: true\\n mapping:\\n mode: ecs\\n\\nservice:\\n extensions: [file_storage]\\n pipelines:\\n logs/json:\\n receivers: \\n - filelog/json\\n processors:\\n - resourcedetection/system \\n - transform/json_parse\\n - transform/parse_date \\n - transform/override_message_field\\n - transform/set_log_severity\\n - attributes/rename_attributes\\n exporters: \\n - elasticsearch\\n\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"These will generate the following document in Elasticsearch\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"@timestamp\\": \\"2024-10-11T12:34:56.123000000Z\\",\\n \\"data_stream\\": {\\n \\"dataset\\": \\"otel\\",\\n \\"namespace\\": \\"default\\",\\n \\"type\\": \\"logs\\"\\n },\\n \\"event\\": {\\n \\"original\\": \\"{\\\\\\\\\\"log_level\\\\\\\\\\":\\\\\\\\\\"WARNING\\\\\\\\\\",\\\\\\\\\\"message\\\\\\\\\\":\\\\\\\\\\"User login successful\\\\\\\\\\",\\\\\\\\\\"service\\\\\\\\\\":\\\\\\\\\\"auth-service\\\\\\\\\\",\\\\\\\\\\"timestamp\\\\\\\\\\":\\\\\\\\\\"2024-10-11 12:34:56.123 +0100\\\\\\\\\\",\\\\\\\\\\"user\\\\\\\\\\":{\\\\\\\\\\"id\\\\\\\\\\":\\\\\\\\\\"A1230\\\\\\\\\\",\\\\\\\\\\"name\\\\\\\\\\":\\\\\\\\\\"john_doe\\\\\\\\\\"}}\\"\\n },\\n \\"host\\": {\\n \\"hostname\\": \\"my-laptop\\",\\n \\"name\\": \\"my-laptop\\",\\n },\\n \\"log\\": {\\n \\"file\\": {\\n \\"name\\": \\"json.log\\"\\n },\\n \\"level\\": \\"WARNING\\"\\n },\\n \\"message\\": \\"User login successful\\",\\n \\"service\\": {\\n \\"name\\": \\"auth-service\\"\\n },\\n \\"user\\": {\\n \\"id\\": \\"A1230\\",\\n \\"name\\": \\"john_doe\\"\\n }\\n}\\n\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this post, we showed examples of how to convert a typical Logstash pipeline into an OpenTelemetry Collector pipeline for logs. While OpenTelemetry provides powerful tools for collecting and exporting logs, if your pipeline relies on complex transformations or scripting, Logstash remains a superior choice. This is because Logstash offers a broader range of built-in features and a more flexible approach to handling advanced data manipulation tasks.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"whats-next\\",children:\\"What\'s Next?\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that you\'ve seen basic (but realistic) examples of converting a Logstash pipeline to OpenTelemetry, it\'s your turn to dive deeper. Depending on your needs, you can explore further and find more detailed resources in the following repositories:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Collector\\"}),\\": Learn about the core OpenTelemetry components, from receivers to exporters.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Collector Contrib\\"}),\\": Find community-contributed components for a wider range of integrations and features.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-collector-components\\",rel:\\"nofollow\\",children:\\"Elastic\'s opentelemetry-collector-components\\"}),\\": Dive into Elastic\'s extensions for the OpenTelemetry Collector, offering more tailored features for Elastic Stack users.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"If you encounter specific challenges or need to handle more advanced use cases, these repositories will be an excellent resource for discovering additional components or integrations that can enhance your pipeline. All these repositories have a similar structure with folders named \\",(0,n.jsx)(e.code,{children:\\"receiver\\"}),\\", \\",(0,n.jsx)(e.code,{children:\\"processor\\"}),\\", \\",(0,n.jsx)(e.code,{children:\\"exporter\\"}),\\", \\",(0,n.jsx)(e.code,{children:\\"connector\\"}),\\", which should be familiar after reading this blog. Whether you are migrating a simple Logstash pipeline or tackling more complex data transformations, these tools and communities will provide the support you need for a successful OpenTelemetry implementation.\\"]})]})}function h(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(d,{...t})}):d(t)}return v(T);})();\\n;return Component;"},"_id":"articles/logstash-to-otel.mdx","_raw":{"sourceFilePath":"articles/logstash-to-otel.mdx","sourceFileName":"logstash-to-otel.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/logstash-to-otel"},"type":"Article","imageUrl":"/assets/images/logstash-to-otel/logstash-otel.jpg","readingTime":"20 min read","url":"/logstash-to-otel","headings":[{"level":2,"title":"Introduction","href":"#introduction"},{"level":2,"title":"What is OpenTelemetry Collector and why should I care?","href":"#what-is-opentelemetry-collector-and-why-should-i-care"},{"level":2,"title":"OTEL Collector Configuration","href":"#otel-collector-configuration"},{"level":2,"title":"Logstash pipeline definition","href":"#logstash-pipeline-definition"},{"level":2,"title":"Logstash pipeline compared to Otel Collector components","href":"#logstash-pipeline-compared-to-otel-collector-components"},{"level":2,"title":"Convert a Logstash Pipeline into OpenTelemetry Collector Pipeline","href":"#convert-a-logstash-pipeline-into-opentelemetry-collector-pipeline"},{"level":3,"title":"Example 1: Parse and transform log line","href":"#example-1-parse-and-transform-log-line"},{"level":3,"title":"Example 2: Parse and transform a NDJSON-formatted log file","href":"#example-2-parse-and-transform-a-ndjson-formatted-log-file"},{"level":2,"title":"Conclusion","href":"#conclusion"},{"level":2,"title":"What\'s Next?","href":"#whats-next"}]},{"title":"Managing your applications on Amazon ECS EC2-based clusters with Elastic Observability","slug":"manage-applications-amazon-ecs-ec2-clusters-observability","date":"2023-08-15","description":"Learn how to manage applications on Amazon ECS clusters based on EC2 instances and how simple it is to use Elastic agents with the AWS and docker integrations to provide a complete picture of your apps, ECS service, and corresponding EC2 instances.","image":"library-branding-elastic-observability-midnight-1680x980.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"aws","type":"Tag","_raw":{}},{"slug":"elastic-agent","type":"Tag","_raw":{}},{"slug":"containers","type":"Tag","_raw":{}},{"slug":"amazon-ecs","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn previous blogs, we explored how Elastic Observability can help you monitor various AWS services and analyze them effectively:\\n\\n- [Managing fundamental AWS services such as Amazon EC2, Amazon RDS, Amazon VPC, and NAT gateway](https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy)\\n- [Data can be ingested into Elastic observability using a serverless forwarder or Amazon Kinesis Data Firehose](https://www.elastic.co/blog/aws-kinesis-data-firehose-elastic-observability-analytics)\\n- [Ingesting and analyzing AWS VPC Flow logs](https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability)\\n\\nOne of the more heavily used AWS container services is Amazon ECS (Elastic Container Service). While there is a trend toward using Fargate to simplify the setup and management of ECS clusters, many users still prefer using Amazon ECS with EC2 instances. It may not be as straightforward or efficient as AWS Fargate, but it offers more control over the underlying infrastructure.\\n\\nIn the most recent blog, we explored how [Elastic Observability helps manage Amazon ECS with Fargate](https://www.elastic.co/blog/elastic-agent-monitor-ecs-aws-fargate-elastic-observability). However, this blog will review how to manage an Amazon ECS cluster with EC2 instances using Elastic Observability instead.\\n\\nIn general, when setting up Amazon ECS-based clusters with EC2, you may or may not have access to the EC2 instances. This determines what you can use with Elastic Observability in monitoring your EC2-based ECS cluster. Hence, there are two components you can use in monitoring the EC2-based ECS cluster with Elastic Observability:\\n\\n![amazon ecs](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-1-amazon-ecs.png)\\n\\nAs you can see in the diagram above, the two components are:\\n\\n1. _ **Baseline setup** \\\\_\\\\_ **:** _ The elastic agent running the AWS integration is configured to obtain ECS metrics and logs from cloud watch. This agent runs on an instance that is not part of the ECS cluster because it allows you to see ALL ECS clusters and other AWS Services, such as EKS, RDS, and EC2.\\n\\n2. _ **Additional setup:** _ If you have access to the EC2 instances in the ECS cluster, then you can run Elastic’s docker integration in each EC2 instance. This gives you significantly more details on the containers than AWS container insights. And it does not require AWS Cloudwatch, which can be fairly costly.\\n\\nUsing either just the baseline or the additional setup, you will have to set up AWS CloudWatch Container Insights for the ECS cluster. However, the docker integration with the additional setup can provide additional information to the AWS CloudWatch Container Insights.\\n\\nHence, we will review how you can monitor the various components of an EC2-based ECS cluster:\\n\\n- EC2 instances in the ASG group\\n- ECS services running in the ECS cluster\\n- ECS tasks (containers)\\n\\nAlso, we will review how you can obtain metrics and logs from the ECS cluster with and without AWS Cloudwatch. We’ll show you how to use:\\n\\n- AWS CloudWatch Container Insights (from Cloudwatch)\\n- Docker metrics (non-Cloudwatch)\\n- Amazon ECS logs via Cloudwatch\\n\\n## Prerequisites and configuration\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up the configuration:\\n\\n- An account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)) — ensure that you have both.\\n- A [nginx](https://hub.docker.com/_/nginx) container and a [stress container](https://github.com/containerstack/alpine-stress) — we will use these two basic containers to help highlight the load on the Elastic ECS Cluster.\\n- An ECS EC2 Cluster in an Auto Scaling Group — ensure you have access in order to load up the Elastic agent on the EC2 instances, or you can create an AMI and use that as the baseline image for your ECS cluster.\\n- An EC2 instance anywhere in your account that is not part of the ECS cluster and has public access (to send metrics and logs)\\n\\n## What will you see in Elastic Observability once it\'s all set up?\\n\\nIf you utilize the baseline configuration with ECS EC2 cluster configured with AWS CloudWatch Container Insights configured, the Elastic Agent configured with the following Elastic agent integrations:\\n\\n- ECS integration\\n- EC2 integration\\n- AWS Cloudwatch Integration with metrics and logging\\n\\nThen you will be able to get the following information in Elastic dashboards:\\n\\n- Containers in the cluster (AWS CloudWatch Container Insights via Elastic Agent and AWS Cloudwatch integration)\\n- Services in the cluster (AWS CloudWatch Container Insights via Elastic Agent and AWS Cloudwatch integration)\\n- CPU and memory utilization of the ECS Cluster (Elastic Agent with ECS integration)\\n- EC2 CPU and memory utilization of the instance in the cluster (Elastic Agent with EC2 integration)\\n- CPU and memory utilization per container (via AWS CloudWatch Container Insights via Elastic Agent and AWS Cloudwatch integration)\\n\\n![containers in cluster](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-2-containers-in-cluster.png)\\n\\nIf the additional configuration using Elastic agents with docker integration per ECS EC2 instance is used, you will be able to get a direct feed of metrics via docker. The following metrics can be viewed:\\n\\n![metrics graphs](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-3-metrics-graphs.png)\\n\\nLet’s see how to set this all up.\\n\\n## Setting it all up\\n\\nOver the next few steps, I’ll walk through:\\n\\n- Getting an account on Elastic Cloud\\n- Bringing up an ECS EC2 cluster and potentially setting up your own AMI\\n- Setting up the containers [nginx](https://hub.docker.com/_/nginx) and a [stress container](https://github.com/containerstack/alpine-stress)\\n- Setting up the Elastic agent with docker container integration on the ECS EC2 instances\\n- Setting up the Elastic agent with AWS, Cloudwatch, and ECS integrations on an independent EC2 instance\\n\\n### Step 1: Create an account on Elastic Cloud\\n\\nFollow the instructions to [get started on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home).\\n\\n![free trial](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-4-free-trial.png)\\n\\n### Step 2: Set up an ECS Cluster with EC2 instances\\n\\nWhen creating a cluster, you have two options when setting it up using the console:\\n\\n- Create a new ASG group where you will only be allowed to use the preloaded set of Amazon Linux (2 or 2023) based AMIs\\n- Set up your own ASG Cluster prior to setting up the ECS Cluster and select this from the options. This option will give you more control over what Linux version and the ability to add things like Elastic agents in the AMI used for the instances in the ASG.\\n\\nRegardless of either option, you will need to turn on **Container Insights** (see the bottom part of the image below).\\n\\n![infrastructure](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-5-infrastructure.png)\\n\\nOnce the cluster is setup, you can go to AWS Cloudwatch where you should see Container Insights for your cluster:\\n\\n![container insights](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-6-container-insights.png)\\n\\n### Step 3: Set up Elastic agent with docker integration\\n\\nNext, you will need to add an Elastic agent to each one of the instances. In the Elastic cloud, set up an Elastic policy with the docker and system integrations as such:\\n\\n![cluster policy](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-7-ecs-ec2-cluster-policy.png)\\n\\nNext, add an agent for the policy, then copy the appropriate install script (in our case it was Linux since we were running Amazon Linux 2), and run it on every EC2 instance in the cluster:\\n\\n![add agent](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-8-add-agent.png)\\n\\nOnce this is added you should see agents in the fleet. Each agent will be on each EC2 instance:\\n\\n![fleet](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-9-fleet.png)\\n\\nIf you decide to set up an ECS EC2 cluster with your own ASG and don’t use Amazon Lunix AMIs (2 or 2023 version), you will have to:\\n\\n- Pick your base image to base an AMI on\\n- [Add an ECS agent and register each instance to the AMI base image manually](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-install.html)\\n- [Add the Elastic agent — standalone version](https://www.elastic.co/guide/en/fleet/current/install-standalone-elastic-agent.html) — this step will require you to configure your Elastic endpoint and API key (or simply add the script in the “add agent” part of the configuration above when using the UI)\\n- Create the AMI once all the above components are added\\n- Use the newly created AMI in creating the ASG for ECS cluster\\n\\n### Step 4: Set up an Elastic agent with the AWS integration\\n\\nFrom the integrations tab in Elastic Cloud, select AWS integration and select add agent. You will then have to walk through the configuration of the AWS integration.\\n\\nAt a minimum, ensure that you have the following configuration options turned on:\\n\\n![toggles](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-10-toggles.png)\\n\\nThis will ensure that not only EC2 metrics and logs are ingested but that all CloudWatch metrics and logs are also ingested. ECS metrics and logs are stored in CloudWatch.\\n\\nIf you want to ensure only logs from the specific ECS cluster are ingested, you can also restrict what to ingest by several parameters. In our setup, we are collecting only logs from Log Group with a prefix of /aws/ecs/containerinsights/EC2BasedCluster/.\\n\\n![cloudwatch](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-11-cloudwatch.png)\\n\\nOnce this policy is set up, add an agent like in Step 1.\\n\\n![add agent testing aws](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-12-add-agent-testing-aws.png)\\n\\nHowever, this agent needs to be added to an EC2 instance which is independent of the ECS cluster.\\n\\nOnce installed, this agent will help pull in:\\n\\n- All EC2 instance metrics across your account (which can be adjusted in the integration policy)\\n- Ingest AWS CloudWatch Container Insights from ECS\\n- ECS metrics such as:\\n - aws.ecs.metrics.CPUReservation.avg\\n - aws.ecs.metrics.CPUUtilization.avg\\n - aws.ecs.metrics.GPUReservation.avg\\n - aws.ecs.metrics.MemoryReservation.avg\\n - aws.ecs.metrics.MemoryUtilization.avg\\n - [More - see the full list here](https://docs.elastic.co/integrations/aws/ecs)\\n\\n### Step 5: Setting up services and containers\\n\\nIn running this configuration, we used [nginx](https://hub.docker.com/_/nginx) and a [stress container](https://github.com/containerstack/alpine-stress) before we go into the task.\\n\\nIn order to initiate service and containers on ECS, you will need to set up a task for each of these containers. But more importantly, you will need to ensure that the roles for both of the following:\\n\\n\\"taskRoleArn\\": \\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRol\\"executionRoleArn\\":,\\n\\n\\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRole\\",\\n\\nhave the following permissions:\\n\\n![permissions](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-13-permissions.png)\\n\\nMost importantly, you should ensure that this permission is added:\\n\\nAmazonEC2ContainerServiceforEC2Role\\n\\nIt will ensure containers can be brought up on the EC2 instances in the cluster.\\n\\nOnce you have the right permissions, then set up the following tasks.\\n\\nHere is the task JSON for NGINX:\\n\\n```json\\n{\\n \\"family\\": \\"NGINX\\",\\n \\"containerDefinitions\\": [\\n {\\n \\"name\\": \\"nginx\\",\\n \\"image\\": \\"nginx: latest\\",\\n \\"cpu\\": 0,\\n \\"portMappings\\": [\\n {\\n \\"name\\": \\"nginx-80-tcp\\",\\n \\"containerPort\\": 80,\\n \\"hostPort\\": 80,\\n \\"protocol\\": \\"tcp\\",\\n \\"appProtocol\\": \\"http\\"\\n }\\n ],\\n \\"essential\\": true,\\n \\"environment\\": [],\\n \\"environmentFiles\\": [],\\n \\"mountPoints\\": [],\\n \\"volumesFrom\\": [],\\n \\"ulimits\\": [],\\n \\"logConfiguration\\": {\\n \\"logDriver\\": \\"awslogs\\",\\n \\"options\\": {\\n \\"awslogs-create-group\\": \\"true\\",\\n \\"awslogs-group\\": \\"/ecs/\\",\\n \\"awslogs-region\\": \\"us-west-2\\",\\n \\"awslogs-stream-prefix\\": \\"ecs\\"\\n },\\n \\"secretOptions\\": []\\n }\\n }\\n ],\\n \\"taskRoleArn\\": \\"arn:aws:iam::xxxxxx:role/ecsTaskExecutionRole\\",\\n \\"executionRoleArn\\": \\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRole\\",\\n \\"networkMode\\": \\"awsvpc\\",\\n \\"requiresCompatibilities\\": [\\"EC2\\"],\\n \\"cpu\\": \\"256\\",\\n \\"memory\\": \\"512\\",\\n \\"runtimePlatform\\": {\\n \\"cpuArchitecture\\": \\"X86_64\\",\\n \\"operatingSystemFamily\\": \\"LINUX\\"\\n }\\n}\\n```\\n\\nHere is the task JSON for stress container:\\n\\n```json\\n{\\n \\"family\\": \\"stressLoad\\",\\n \\"containerDefinitions\\": [\\n {\\n \\"name\\": \\"stressLoad\\",\\n \\"image\\": \\"containerstack/alpine-stress\\",\\n \\"cpu\\": 0,\\n \\"memory\\": 512,\\n \\"memoryReservation\\": 512,\\n \\"portMappings\\": [],\\n \\"essential\\": true,\\n \\"entryPoint\\": [\\"sh\\", \\"-c\\"],\\n \\"command\\": [\\n \\"/usr/local/bin/stress --cpu 2 --io 2 --vm 1 --vm-bytes 128M --timeout 6000s\\"\\n ],\\n \\"environment\\": [],\\n \\"mountPoints\\": [],\\n \\"volumesFrom\\": [],\\n \\"logConfiguration\\": {\\n \\"logDriver\\": \\"awslogs\\",\\n \\"options\\": {\\n \\"awslogs-create-group\\": \\"true\\",\\n \\"awslogs-group\\": \\"/ecs/\\",\\n \\"awslogs-region\\": \\"us-west-2\\",\\n \\"awslogs-stream-prefix\\": \\"ecs\\"\\n }\\n }\\n }\\n ],\\n \\"taskRoleArn\\": \\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRole\\",\\n \\"executionRoleArn\\": \\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRole\\",\\n \\"networkMode\\": \\"awsvpc\\",\\n \\"requiresCompatibilities\\": [\\"EC2\\"],\\n \\"cpu\\": \\"256\\",\\n \\"memory\\": \\"512\\",\\n \\"runtimePlatform\\": {\\n \\"cpuArchitecture\\": \\"X86_64\\",\\n \\"operatingSystemFamily\\": \\"LINUX\\"\\n }\\n}\\n```\\n\\nOnce you have defined the tasks, ensure you bring up each service (one for each task) with the launch type of EC2:\\n\\n![environment](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-14-environment.png)\\n\\nYou should have two services running now.\\n\\n![ec2basedcluster](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-15-ec2basedcluster.png)\\n\\n### Step 6: Check on metrics and logs in Elastic Cloud\\n\\nGo to Elastic Cloud and ensure that you are getting metrics and logs from the ECS Cluster. First, check to see if you are receiving metrics by viewing the built-in dashboard called [Metrics Docker] Overview.\\n\\n![Docker image](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-16-docker.png)\\n\\n_ **With some work on this dashboard by adding in container insight metrics and docker metrics, you should be able to see:** _\\n\\n![graphs](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-3-metrics-graphs.png)\\n\\nIf you only have the ECS integration and the Elastic agent in Step 2, then you will need to create a new dashboard:\\n\\n![cluster](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-2-containers-in-cluster.png)\\n\\nThis dashboard can be set up with the following metrics:\\n\\n- Containers in the cluster (containerInsights via Elastic Agent and AWS Cloudwatch integration). Set up a TSVB panel using the following metric: aws.dimensions.ClusterName : \\"EC2BasedCluster\\" with aws.containerinsights.metrics.TaskCount.max\\n- Services in the cluster (containerInsights via Elastic Agent and AWS Cloudwatch integration). Use the following configuration to setup the chart:\\n\\n![table](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-17-table.png)\\n\\n- CPU and memory utilization of the ECS Cluster (Elastic Agent with ECS integration). Use the following configuration to set up both CPU and memory utilization charts:\\n\\n![line](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-18-line.png)\\n\\n- EC2 CPU and storage utilization of the instance in the cluster (Elastic Agent with EC2 integration). Use the following configuration to set up both CPU and memory utilization charts:\\n\\n![bar vertical stacked](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-19-bar-vertical-stacked.png)\\n\\n- (Not shown): CPU and memory utilization per container (via containerInsights via Elastic Agent and AWS Cloudwatch integration)\\n\\n### Step 7: Look at logs from your ECS cluster\\n\\nSince we set up AWS CloudWatch logs collection in Step 2, we can view these logs in Discover by filtering on the logs group arn /aws/ecs/containerinsights/EC2BasedCluster/.\\n\\n![logs](/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-20-logs.png)\\n\\n## Summary\\n\\nI hope you’ve gotten an appreciation for how Elastic Observability can help your [AWS monitoring](https://www.elastic.co/observability/aws-monitoring) ECS service metrics. Here’s a quick recap of lessons and what you learned:\\n\\n- Elastic Observability supports ingesting and analysis of AWS ECS service metrics and the corresponding EC2 metrics through the AWS integration on the Elastic Agent. It’s easy to set up ingest from AWS Services via the Elastic Agent.\\n- Elastic Observability can also get container metrics via the Docker integration running on Elastic agents on each of the EC2 instances in the ECS EC2 auto scaling group.\\n- Elastic has multiple out-of-the-box (OOTB) AWS service dashboards that can be used as baselines to get your own customized view.\\n\\nReady to get started? Start your own [7-day free trial](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el) by signing up via [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=d54b31eb-671c-49ba-88bb-7a1106421dfa%E2%89%BBchannel=el) and quickly spin up a deployment in minutes on any of the [Elastic Cloud regions on AWS](https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions) around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var g=Object.create;var s=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var b=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var i in e)s(t,i,{get:e[i],enumerable:!0})},l=(t,e,i,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of p(e))!w.call(t,a)&&a!==i&&s(t,a,{get:()=>e[a],enumerable:!(o=u(e,a))||o.enumerable});return t};var f=(t,e,i)=>(i=t!=null?g(m(t)):{},l(e||!t||!t.__esModule?s(i,\\"default\\",{value:t,enumerable:!0}):i,t)),C=t=>l(s({},\\"__esModule\\",{value:!0}),t);var c=b((A,r)=>{r.exports=_jsx_runtime});var v={};y(v,{default:()=>d,frontmatter:()=>E});var n=f(c()),E={title:\\"Managing your applications on Amazon ECS EC2-based clusters with Elastic Observability\\",slug:\\"manage-applications-amazon-ecs-ec2-clusters-observability\\",date:\\"2023-08-15\\",description:\\"Learn how to manage applications on Amazon ECS clusters based on EC2 instances and how simple it is to use Elastic agents with the AWS and docker integrations to provide a complete picture of your apps, ECS service, and corresponding EC2 instances.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"library-branding-elastic-observability-midnight-1680x980.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"aws\\"},{slug:\\"elastic-agent\\"},{slug:\\"containers\\"},{slug:\\"amazon-ecs\\"}]};function h(t){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"In previous blogs, we explored how Elastic Observability can help you monitor various AWS services and analyze them effectively:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"Managing fundamental AWS services such as Amazon EC2, Amazon RDS, Amazon VPC, and NAT gateway\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-kinesis-data-firehose-elastic-observability-analytics\\",rel:\\"nofollow\\",children:\\"Data can be ingested into Elastic observability using a serverless forwarder or Amazon Kinesis Data Firehose\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability\\",rel:\\"nofollow\\",children:\\"Ingesting and analyzing AWS VPC Flow logs\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"One of the more heavily used AWS container services is Amazon ECS (Elastic Container Service). While there is a trend toward using Fargate to simplify the setup and management of ECS clusters, many users still prefer using Amazon ECS with EC2 instances. It may not be as straightforward or efficient as AWS Fargate, but it offers more control over the underlying infrastructure.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In the most recent blog, we explored how \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-agent-monitor-ecs-aws-fargate-elastic-observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability helps manage Amazon ECS with Fargate\\"}),\\". However, this blog will review how to manage an Amazon ECS cluster with EC2 instances using Elastic Observability instead.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In general, when setting up Amazon ECS-based clusters with EC2, you may or may not have access to the EC2 instances. This determines what you can use with Elastic Observability in monitoring your EC2-based ECS cluster. Hence, there are two components you can use in monitoring the EC2-based ECS cluster with Elastic Observability:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-1-amazon-ecs.png\\",alt:\\"amazon ecs\\",width:\\"1934\\",height:\\"1032\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As you can see in the diagram above, the two components are:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"_ \\",(0,n.jsx)(e.strong,{children:\\"Baseline setup\\"}),\\" __ \\",(0,n.jsx)(e.strong,{children:\\":\\"}),\\" _ The elastic agent running the AWS integration is configured to obtain ECS metrics and logs from cloud watch. This agent runs on an instance that is not part of the ECS cluster because it allows you to see ALL ECS clusters and other AWS Services, such as EKS, RDS, and EC2.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"_ \\",(0,n.jsx)(e.strong,{children:\\"Additional setup:\\"}),\\" _ If you have access to the EC2 instances in the ECS cluster, then you can run Elastic\\\\u2019s docker integration in each EC2 instance. This gives you significantly more details on the containers than AWS container insights. And it does not require AWS Cloudwatch, which can be fairly costly.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Using either just the baseline or the additional setup, you will have to set up AWS CloudWatch Container Insights for the ECS cluster. However, the docker integration with the additional setup can provide additional information to the AWS CloudWatch Container Insights.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Hence, we will review how you can monitor the various components of an EC2-based ECS cluster:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"EC2 instances in the ASG group\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"ECS services running in the ECS cluster\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"ECS tasks (containers)\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Also, we will review how you can obtain metrics and logs from the ECS cluster with and without AWS Cloudwatch. We\\\\u2019ll show you how to use:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"AWS CloudWatch Container Insights (from Cloudwatch)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Docker metrics (non-Cloudwatch)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Amazon ECS logs via Cloudwatch\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"prerequisites-and-configuration\\",children:\\"Prerequisites and configuration\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up the configuration:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"An account on \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\") \\\\u2014 ensure that you have both.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"A \\",(0,n.jsx)(e.a,{href:\\"https://hub.docker.com/_/nginx\\",rel:\\"nofollow\\",children:\\"nginx\\"}),\\" container and a \\",(0,n.jsx)(e.a,{href:\\"https://github.com/containerstack/alpine-stress\\",rel:\\"nofollow\\",children:\\"stress container\\"}),\\" \\\\u2014 we will use these two basic containers to help highlight the load on the Elastic ECS Cluster.\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"An ECS EC2 Cluster in an Auto Scaling Group \\\\u2014 ensure you have access in order to load up the Elastic agent on the EC2 instances, or you can create an AMI and use that as the baseline image for your ECS cluster.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"An EC2 instance anywhere in your account that is not part of the ECS cluster and has public access (to send metrics and logs)\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"what-will-you-see-in-elastic-observability-once-its-all-set-up\\",children:\\"What will you see in Elastic Observability once it\'s all set up?\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you utilize the baseline configuration with ECS EC2 cluster configured with AWS CloudWatch Container Insights configured, the Elastic Agent configured with the following Elastic agent integrations:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"ECS integration\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"EC2 integration\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"AWS Cloudwatch Integration with metrics and logging\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Then you will be able to get the following information in Elastic dashboards:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Containers in the cluster (AWS CloudWatch Container Insights via Elastic Agent and AWS Cloudwatch integration)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Services in the cluster (AWS CloudWatch Container Insights via Elastic Agent and AWS Cloudwatch integration)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"CPU and memory utilization of the ECS Cluster (Elastic Agent with ECS integration)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"EC2 CPU and memory utilization of the instance in the cluster (Elastic Agent with EC2 integration)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"CPU and memory utilization per container (via AWS CloudWatch Container Insights via Elastic Agent and AWS Cloudwatch integration)\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-2-containers-in-cluster.png\\",alt:\\"containers in cluster\\",width:\\"1999\\",height:\\"932\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"If the additional configuration using Elastic agents with docker integration per ECS EC2 instance is used, you will be able to get a direct feed of metrics via docker. The following metrics can be viewed:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-3-metrics-graphs.png\\",alt:\\"metrics graphs\\",width:\\"1999\\",height:\\"1071\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\\\\u2019s see how to set this all up.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"setting-it-all-up\\",children:\\"Setting it all up\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Over the next few steps, I\\\\u2019ll walk through:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Getting an account on Elastic Cloud\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Bringing up an ECS EC2 cluster and potentially setting up your own AMI\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Setting up the containers \\",(0,n.jsx)(e.a,{href:\\"https://hub.docker.com/_/nginx\\",rel:\\"nofollow\\",children:\\"nginx\\"}),\\" and a \\",(0,n.jsx)(e.a,{href:\\"https://github.com/containerstack/alpine-stress\\",rel:\\"nofollow\\",children:\\"stress container\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Setting up the Elastic agent with docker container integration on the ECS EC2 instances\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Setting up the Elastic agent with AWS, Cloudwatch, and ECS integrations on an independent EC2 instance\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-1-create-an-account-on-elastic-cloud\\",children:\\"Step 1: Create an account on Elastic Cloud\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Follow the instructions to \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-4-free-trial.png\\",alt:\\"free trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-2-set-up-an-ecs-cluster-with-ec2-instances\\",children:\\"Step 2: Set up an ECS Cluster with EC2 instances\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"When creating a cluster, you have two options when setting it up using the console:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Create a new ASG group where you will only be allowed to use the preloaded set of Amazon Linux (2 or 2023) based AMIs\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Set up your own ASG Cluster prior to setting up the ECS Cluster and select this from the options. This option will give you more control over what Linux version and the ability to add things like Elastic agents in the AMI used for the instances in the ASG.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Regardless of either option, you will need to turn on \\",(0,n.jsx)(e.strong,{children:\\"Container Insights\\"}),\\" (see the bottom part of the image below).\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-5-infrastructure.png\\",alt:\\"infrastructure\\",width:\\"1614\\",height:\\"1592\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once the cluster is setup, you can go to AWS Cloudwatch where you should see Container Insights for your cluster:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-6-container-insights.png\\",alt:\\"container insights\\",width:\\"1964\\",height:\\"1464\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-3-set-up-elastic-agent-with-docker-integration\\",children:\\"Step 3: Set up Elastic agent with docker integration\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Next, you will need to add an Elastic agent to each one of the instances. In the Elastic cloud, set up an Elastic policy with the docker and system integrations as such:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-7-ecs-ec2-cluster-policy.png\\",alt:\\"cluster policy\\",width:\\"1999\\",height:\\"536\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Next, add an agent for the policy, then copy the appropriate install script (in our case it was Linux since we were running Amazon Linux 2), and run it on every EC2 instance in the cluster:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-8-add-agent.png\\",alt:\\"add agent\\",width:\\"1999\\",height:\\"1213\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once this is added you should see agents in the fleet. Each agent will be on each EC2 instance:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-9-fleet.png\\",alt:\\"fleet\\",width:\\"1986\\",height:\\"770\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you decide to set up an ECS EC2 cluster with your own ASG and don\\\\u2019t use Amazon Lunix AMIs (2 or 2023 version), you will have to:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Pick your base image to base an AMI on\\"}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-install.html\\",rel:\\"nofollow\\",children:\\"Add an ECS agent and register each instance to the AMI base image manually\\"})}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/install-standalone-elastic-agent.html\\",rel:\\"nofollow\\",children:\\"Add the Elastic agent \\\\u2014 standalone version\\"}),\\" \\\\u2014 this step will require you to configure your Elastic endpoint and API key (or simply add the script in the \\\\u201Cadd agent\\\\u201D part of the configuration above when using the UI)\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Create the AMI once all the above components are added\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Use the newly created AMI in creating the ASG for ECS cluster\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-4-set-up-an-elastic-agent-with-the-aws-integration\\",children:\\"Step 4: Set up an Elastic agent with the AWS integration\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"From the integrations tab in Elastic Cloud, select AWS integration and select add agent. You will then have to walk through the configuration of the AWS integration.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"At a minimum, ensure that you have the following configuration options turned on:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-10-toggles.png\\",alt:\\"toggles\\",width:\\"1244\\",height:\\"938\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This will ensure that not only EC2 metrics and logs are ingested but that all CloudWatch metrics and logs are also ingested. ECS metrics and logs are stored in CloudWatch.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you want to ensure only logs from the specific ECS cluster are ingested, you can also restrict what to ingest by several parameters. In our setup, we are collecting only logs from Log Group with a prefix of /aws/ecs/containerinsights/EC2BasedCluster/.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-11-cloudwatch.png\\",alt:\\"cloudwatch\\",width:\\"1164\\",height:\\"1480\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once this policy is set up, add an agent like in Step 1.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-12-add-agent-testing-aws.png\\",alt:\\"add agent testing aws\\",width:\\"1999\\",height:\\"1388\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"However, this agent needs to be added to an EC2 instance which is independent of the ECS cluster.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once installed, this agent will help pull in:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"All EC2 instance metrics across your account (which can be adjusted in the integration policy)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Ingest AWS CloudWatch Container Insights from ECS\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"ECS metrics such as:\\",`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"aws.ecs.metrics.CPUReservation.avg\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"aws.ecs.metrics.CPUUtilization.avg\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"aws.ecs.metrics.GPUReservation.avg\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"aws.ecs.metrics.MemoryReservation.avg\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"aws.ecs.metrics.MemoryUtilization.avg\\"}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/aws/ecs\\",rel:\\"nofollow\\",children:\\"More - see the full list here\\"})}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-5-setting-up-services-and-containers\\",children:\\"Step 5: Setting up services and containers\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In running this configuration, we used \\",(0,n.jsx)(e.a,{href:\\"https://hub.docker.com/_/nginx\\",rel:\\"nofollow\\",children:\\"nginx\\"}),\\" and a \\",(0,n.jsx)(e.a,{href:\\"https://github.com/containerstack/alpine-stress\\",rel:\\"nofollow\\",children:\\"stress container\\"}),\\" before we go into the task.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In order to initiate service and containers on ECS, you will need to set up a task for each of these containers. But more importantly, you will need to ensure that the roles for both of the following:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\'\\"taskRoleArn\\": \\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRol\\"executionRoleArn\\":,\'}),`\\n`,(0,n.jsx)(e.p,{children:\'\\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRole\\",\'}),`\\n`,(0,n.jsx)(e.p,{children:\\"have the following permissions:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-13-permissions.png\\",alt:\\"permissions\\",width:\\"1999\\",height:\\"390\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Most importantly, you should ensure that this permission is added:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"AmazonEC2ContainerServiceforEC2Role\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"It will ensure containers can be brought up on the EC2 instances in the cluster.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once you have the right permissions, then set up the following tasks.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here is the task JSON for NGINX:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"family\\": \\"NGINX\\",\\n \\"containerDefinitions\\": [\\n {\\n \\"name\\": \\"nginx\\",\\n \\"image\\": \\"nginx: latest\\",\\n \\"cpu\\": 0,\\n \\"portMappings\\": [\\n {\\n \\"name\\": \\"nginx-80-tcp\\",\\n \\"containerPort\\": 80,\\n \\"hostPort\\": 80,\\n \\"protocol\\": \\"tcp\\",\\n \\"appProtocol\\": \\"http\\"\\n }\\n ],\\n \\"essential\\": true,\\n \\"environment\\": [],\\n \\"environmentFiles\\": [],\\n \\"mountPoints\\": [],\\n \\"volumesFrom\\": [],\\n \\"ulimits\\": [],\\n \\"logConfiguration\\": {\\n \\"logDriver\\": \\"awslogs\\",\\n \\"options\\": {\\n \\"awslogs-create-group\\": \\"true\\",\\n \\"awslogs-group\\": \\"/ecs/\\",\\n \\"awslogs-region\\": \\"us-west-2\\",\\n \\"awslogs-stream-prefix\\": \\"ecs\\"\\n },\\n \\"secretOptions\\": []\\n }\\n }\\n ],\\n \\"taskRoleArn\\": \\"arn:aws:iam::xxxxxx:role/ecsTaskExecutionRole\\",\\n \\"executionRoleArn\\": \\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRole\\",\\n \\"networkMode\\": \\"awsvpc\\",\\n \\"requiresCompatibilities\\": [\\"EC2\\"],\\n \\"cpu\\": \\"256\\",\\n \\"memory\\": \\"512\\",\\n \\"runtimePlatform\\": {\\n \\"cpuArchitecture\\": \\"X86_64\\",\\n \\"operatingSystemFamily\\": \\"LINUX\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here is the task JSON for stress container:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"family\\": \\"stressLoad\\",\\n \\"containerDefinitions\\": [\\n {\\n \\"name\\": \\"stressLoad\\",\\n \\"image\\": \\"containerstack/alpine-stress\\",\\n \\"cpu\\": 0,\\n \\"memory\\": 512,\\n \\"memoryReservation\\": 512,\\n \\"portMappings\\": [],\\n \\"essential\\": true,\\n \\"entryPoint\\": [\\"sh\\", \\"-c\\"],\\n \\"command\\": [\\n \\"/usr/local/bin/stress --cpu 2 --io 2 --vm 1 --vm-bytes 128M --timeout 6000s\\"\\n ],\\n \\"environment\\": [],\\n \\"mountPoints\\": [],\\n \\"volumesFrom\\": [],\\n \\"logConfiguration\\": {\\n \\"logDriver\\": \\"awslogs\\",\\n \\"options\\": {\\n \\"awslogs-create-group\\": \\"true\\",\\n \\"awslogs-group\\": \\"/ecs/\\",\\n \\"awslogs-region\\": \\"us-west-2\\",\\n \\"awslogs-stream-prefix\\": \\"ecs\\"\\n }\\n }\\n }\\n ],\\n \\"taskRoleArn\\": \\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRole\\",\\n \\"executionRoleArn\\": \\"arn:aws:iam::xxxxx:role/ecsTaskExecutionRole\\",\\n \\"networkMode\\": \\"awsvpc\\",\\n \\"requiresCompatibilities\\": [\\"EC2\\"],\\n \\"cpu\\": \\"256\\",\\n \\"memory\\": \\"512\\",\\n \\"runtimePlatform\\": {\\n \\"cpuArchitecture\\": \\"X86_64\\",\\n \\"operatingSystemFamily\\": \\"LINUX\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once you have defined the tasks, ensure you bring up each service (one for each task) with the launch type of EC2:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-14-environment.png\\",alt:\\"environment\\",width:\\"1654\\",height:\\"1064\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You should have two services running now.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-15-ec2basedcluster.png\\",alt:\\"ec2basedcluster\\",width:\\"1999\\",height:\\"1034\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-6-check-on-metrics-and-logs-in-elastic-cloud\\",children:\\"Step 6: Check on metrics and logs in Elastic Cloud\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Go to Elastic Cloud and ensure that you are getting metrics and logs from the ECS Cluster. First, check to see if you are receiving metrics by viewing the built-in dashboard called [Metrics Docker] Overview.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-16-docker.png\\",alt:\\"Docker image\\",width:\\"1932\\",height:\\"736\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"_ \\",(0,n.jsx)(e.strong,{children:\\"With some work on this dashboard by adding in container insight metrics and docker metrics, you should be able to see:\\"}),\\" _\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-3-metrics-graphs.png\\",alt:\\"graphs\\",width:\\"1999\\",height:\\"1071\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you only have the ECS integration and the Elastic agent in Step 2, then you will need to create a new dashboard:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-2-containers-in-cluster.png\\",alt:\\"cluster\\",width:\\"1999\\",height:\\"932\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This dashboard can be set up with the following metrics:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\'Containers in the cluster (containerInsights via Elastic Agent and AWS Cloudwatch integration). Set up a TSVB panel using the following metric: aws.dimensions.ClusterName : \\"EC2BasedCluster\\" with aws.containerinsights.metrics.TaskCount.max\'}),`\\n`,(0,n.jsx)(e.li,{children:\\"Services in the cluster (containerInsights via Elastic Agent and AWS Cloudwatch integration). Use the following configuration to setup the chart:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-17-table.png\\",alt:\\"table\\",width:\\"692\\",height:\\"838\\"})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"CPU and memory utilization of the ECS Cluster (Elastic Agent with ECS integration). Use the following configuration to set up both CPU and memory utilization charts:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-18-line.png\\",alt:\\"line\\",width:\\"660\\",height:\\"668\\"})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"EC2 CPU and storage utilization of the instance in the cluster (Elastic Agent with EC2 integration). Use the following configuration to set up both CPU and memory utilization charts:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-19-bar-vertical-stacked.png\\",alt:\\"bar vertical stacked\\",width:\\"668\\",height:\\"706\\"})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"(Not shown): CPU and memory utilization per container (via containerInsights via Elastic Agent and AWS Cloudwatch integration)\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-7-look-at-logs-from-your-ecs-cluster\\",children:\\"Step 7: Look at logs from your ECS cluster\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Since we set up AWS CloudWatch logs collection in Step 2, we can view these logs in Discover by filtering on the logs group arn /aws/ecs/containerinsights/EC2BasedCluster/.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/elastic-blog-20-logs.png\\",alt:\\"logs\\",width:\\"1999\\",height:\\"1080\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"I hope you\\\\u2019ve gotten an appreciation for how Elastic Observability can help your \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/aws-monitoring\\",rel:\\"nofollow\\",children:\\"AWS monitoring\\"}),\\" ECS service metrics. Here\\\\u2019s a quick recap of lessons and what you learned:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Elastic Observability supports ingesting and analysis of AWS ECS service metrics and the corresponding EC2 metrics through the AWS integration on the Elastic Agent. It\\\\u2019s easy to set up ingest from AWS Services via the Elastic Agent.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Elastic Observability can also get container metrics via the Docker integration running on Elastic agents on each of the EC2 instances in the ECS EC2 auto scaling group.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Elastic has multiple out-of-the-box (OOTB) AWS service dashboards that can be used as baselines to get your own customized view.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Ready to get started? Start your own \\",(0,n.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"7-day free trial\\"}),\\" by signing up via \\",(0,n.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=d54b31eb-671c-49ba-88bb-7a1106421dfa%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"AWS Marketplace\\"}),\\" and quickly spin up a deployment in minutes on any of the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions\\",rel:\\"nofollow\\",children:\\"Elastic Cloud regions on AWS\\"}),\\" around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return C(v);})();\\n;return Component;"},"_id":"articles/manage-applications-amazon-ecs-ec2-based-clusters-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/manage-applications-amazon-ecs-ec2-based-clusters-elastic-observability.mdx","sourceFileName":"manage-applications-amazon-ecs-ec2-based-clusters-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/manage-applications-amazon-ecs-ec2-based-clusters-elastic-observability"},"type":"Article","imageUrl":"/assets/images/manage-applications-amazon-ecs-ec2-clusters-observability/library-branding-elastic-observability-midnight-1680x980.png","readingTime":"15 min read","url":"/manage-applications-amazon-ecs-ec2-clusters-observability","headings":[{"level":2,"title":"Prerequisites and configuration","href":"#prerequisites-and-configuration"},{"level":2,"title":"What will you see in Elastic Observability once it\'s all set up?","href":"#what-will-you-see-in-elastic-observability-once-its-all-set-up"},{"level":2,"title":"Setting it all up","href":"#setting-it-all-up"},{"level":3,"title":"Step 1: Create an account on Elastic Cloud","href":"#step-1-create-an-account-on-elastic-cloud"},{"level":3,"title":"Step 2: Set up an ECS Cluster with EC2 instances","href":"#step-2-set-up-an-ecs-cluster-with-ec2-instances"},{"level":3,"title":"Step 3: Set up Elastic agent with docker integration","href":"#step-3-set-up-elastic-agent-with-docker-integration"},{"level":3,"title":"Step 4: Set up an Elastic agent with the AWS integration","href":"#step-4-set-up-an-elastic-agent-with-the-aws-integration"},{"level":3,"title":"Step 5: Setting up services and containers","href":"#step-5-setting-up-services-and-containers"},{"level":3,"title":"Step 6: Check on metrics and logs in Elastic Cloud","href":"#step-6-check-on-metrics-and-logs-in-elastic-cloud"},{"level":3,"title":"Step 7: Look at logs from your ECS cluster","href":"#step-7-look-at-logs-from-your-ecs-cluster"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Manual instrumentation of Go applications with OpenTelemetry","slug":"manual-instrumentation-apps-opentelemetry","date":"2023-09-12","description":"In this blog post, we will show you how to manually instrument Go applications using OpenTelemetry. We will explore how to use the proper OpenTelemetry Go packages and, in particular, work on instrumenting tracing in a Go application.","image":"observability-launch-series-5-go-manual.jpg","author":[{"slug":"luca-wintergerst","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"go","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nDevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\n\\nObservability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers — a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\n\\nThanks to [OpenTelemetry](https://opentelemetry.io) (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and have a large support community reducing vendor lock-in.\\n\\nIn this blog post, we will show you how to manually instrument Go applications using OpenTelemetry. This approach is slightly more complex than using auto-instrumentation\\n\\nIn a [previous blog](https://www.elastic.co/blog/opentelemetry-observability), we also reviewed how to use the OpenTelemetry demo and connect it to Elastic\xae, as well as some of Elastic’s capabilities with OpenTelemetry. In this blog, we will use [an alternative demo application](https://github.com/elastic/observability-examples), which helps highlight manual instrumentation in a simple way.\\n\\nFinally, we will discuss how Elastic supports mixed-mode applications, which run with Elastic and OpenTelemetry agents. The beauty of this is that there is **no need for the otel-collector**! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\n\\n## Application, prerequisites, and config\\n\\nThe application that we use for this blog is called [Elastiflix](https://github.com/elastic/observability-examples), a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\n\\nBefore we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\n\\n![Elastic configuration options for OpenTelemetry](/assets/images/manual-instrumentation-apps-opentelemetry/GO-flowhcart.png)\\n\\nAll of Elastic Observability’s APM capabilities are available with OTel data. Some of these include:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services, distributed tracing\\n- Transactions (traces)\\n- Machine learning (ML) correlations\\n- Log correlation\\n\\nIn addition to Elastic’s APM and a unified view of the telemetry data, you will also be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\n\\n## Prerequisites\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/)\\n- A clone of the [Elastiflix demo application](https://github.com/elastic/observability-examples), or your own Go application\\n- Basic understanding of Docker — potentially install [Docker Desktop](https://www.docker.com/products/docker-desktop/)\\n- Basic understanding of Go\\n\\n## View the example source code\\n\\nThe full source code including the Dockerfile used in this blog can be found on [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite-otel-manual). The repository also contains the [same application without instrumentation](https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite). This allows you to compare each file and see the differences.\\n\\nBefore we begin, let’s look at the non-instrumented code first.\\n\\nThis is our simple go application that can receive a GET request. Note that the code shown here is a slightly abbreviated version.\\n\\n```go\\npackage main\\n\\nimport (\\n\\t\\"log\\"\\n\\t\\"net/http\\"\\n\\t\\"os\\"\\n\\t\\"time\\"\\n\\n\\t\\"github.com/go-redis/redis/v8\\"\\n\\n\\t\\"github.com/sirupsen/logrus\\"\\n\\n\\t\\"github.com/gin-gonic/gin\\"\\n\\t\\"strconv\\"\\n\\t\\"math/rand\\"\\n)\\n\\nvar logger = &logrus.Logger{\\n\\tOut: os.Stderr,\\n\\tHooks: make(logrus.LevelHooks),\\n\\tLevel: logrus.InfoLevel,\\n\\tFormatter: &logrus.JSONFormatter{\\n\\t\\tFieldMap: logrus.FieldMap{\\n\\t\\t\\tlogrus.FieldKeyTime: \\"@timestamp\\",\\n\\t\\t\\tlogrus.FieldKeyLevel: \\"log.level\\",\\n\\t\\t\\tlogrus.FieldKeyMsg: \\"message\\",\\n\\t\\t\\tlogrus.FieldKeyFunc: \\"function.name\\", // non-ECS\\n\\t\\t},\\n\\t\\tTimestampFormat: time.RFC3339Nano,\\n\\t},\\n}\\n\\nfunc main() {\\n\\tdelayTime, _ := strconv.Atoi(os.Getenv(\\"TOGGLE_SERVICE_DELAY\\"))\\n\\n\\tredisHost := os.Getenv(\\"REDIS_HOST\\")\\n\\tif redisHost == \\"\\" {\\n\\t\\tredisHost = \\"localhost\\"\\n\\t}\\n\\n\\tredisPort := os.Getenv(\\"REDIS_PORT\\")\\n\\tif redisPort == \\"\\" {\\n\\t\\tredisPort = \\"6379\\"\\n\\t}\\n\\n\\tapplicationPort := os.Getenv(\\"APPLICATION_PORT\\")\\n\\tif applicationPort == \\"\\" {\\n\\t\\tapplicationPort = \\"5000\\"\\n\\t}\\n\\n\\t// Initialize Redis client\\n\\trdb := redis.NewClient(&redis.Options{\\n\\t\\tAddr: redisHost + \\":\\" + redisPort,\\n\\t\\tPassword: \\"\\",\\n\\t\\tDB: 0,\\n\\t})\\n\\n\\t// Initialize router\\n\\tr := gin.New()\\n\\tr.Use(logrusMiddleware)\\n\\n\\tr.GET(\\"/favorites\\", func(c *gin.Context) {\\n\\t\\t// artificial sleep for delayTime\\n\\t\\ttime.Sleep(time.Duration(delayTime) * time.Millisecond)\\n\\n\\t\\tuserID := c.Query(\\"user_id\\")\\n\\n\\t\\tcontextLogger(c).Infof(\\"Getting favorites for user %q\\", userID)\\n\\n\\t\\tfavorites, err := rdb.SMembers(c.Request.Context(), userID).Result()\\n\\t\\tif err != nil {\\n\\t\\t\\tcontextLogger(c).Error(\\"Failed to get favorites for user %q\\", userID)\\n\\t\\t\\tc.String(http.StatusInternalServerError, \\"Failed to get favorites\\")\\n\\t\\t\\treturn\\n\\t\\t}\\n\\n\\t\\tcontextLogger(c).Infof(\\"User %q has favorites %q\\", userID, favorites)\\n\\n\\t\\tc.JSON(http.StatusOK, gin.H{\\n\\t\\t\\t\\"favorites\\": favorites,\\n\\t\\t})\\n\\t})\\n\\n\\t// Start server\\n\\tlogger.Infof(\\"App startup\\")\\n\\tlog.Fatal(http.ListenAndServe(\\":\\"+applicationPort, r))\\n\\tlogger.Infof(\\"App stopped\\")\\n}\\n```\\n\\n## Step-by-step guide\\n\\n### Step 0. Log in to your Elastic Cloud account\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the [instructions to get started on Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![free trial](/assets/images/manual-instrumentation-apps-opentelemetry/elastic-blog-4-free-trial.png)\\n\\n### Step 1. Install and initialize OpenTelemetry\\n\\nAs a first step, we’ll need to add some additional packages to our application.\\n\\n```go\\nimport (\\n \\"github.com/go-redis/redis/extra/redisotel/v8\\"\\n \\"go.opentelemetry.io/otel\\"\\n \\"go.opentelemetry.io/otel/attribute\\"\\n \\"go.opentelemetry.io/otel/exporters/otlp/otlptrace\\"\\n \\"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc\\"\\n\\n\\t\\"go.opentelemetry.io/otel/propagation\\"\\n\\n\\t\\"google.golang.org/grpc/credentials\\"\\n\\t\\"crypto/tls\\"\\n\\n sdktrace \\"go.opentelemetry.io/otel/sdk/trace\\"\\n\\n\\t\\"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin\\"\\n\\n\\t\\"go.opentelemetry.io/otel/trace\\"\\n\\t\\"go.opentelemetry.io/otel/codes\\"\\n)\\n```\\n\\nThis code imports necessary OpenTelemetry packages, including those for tracing, exporting, and instrumenting specific libraries like Redis.\\n\\nNext we read the \\"OTEL_EXPORTER_OTLP_ENDPOINT\\" variable and initialize the exporter.\\n\\n```go\\nvar (\\n collectorURL = os.Getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")\\n)\\nvar tracer trace.Tracer\\n\\n\\nfunc initTracer() func(context.Context) error {\\n\\ttracer = otel.Tracer(\\"go-favorite-otel-manual\\")\\n\\n\\t// remove https:// from the collector URL if it exists\\n\\tcollectorURL = strings.Replace(collectorURL, \\"https://\\", \\"\\", 1)\\n\\tsecretToken := os.Getenv(\\"ELASTIC_APM_SECRET_TOKEN\\")\\n\\tif secretToken == \\"\\" {\\n\\t\\tlog.Fatal(\\"ELASTIC_APM_SECRET_TOKEN is required\\")\\n\\t}\\n\\n\\tsecureOption := otlptracegrpc.WithInsecure()\\n exporter, err := otlptrace.New(\\n context.Background(),\\n otlptracegrpc.NewClient(\\n secureOption,\\n otlptracegrpc.WithEndpoint(collectorURL),\\n\\t\\t\\totlptracegrpc.WithHeaders(map[string]string{\\n\\t\\t\\t\\t\\"Authorization\\": \\"Bearer \\" + secretToken,\\n\\t\\t\\t}),\\n\\t\\t\\totlptracegrpc.WithTLSCredentials(credentials.NewTLS(&tls.Config{})),\\n ),\\n )\\n\\n if err != nil {\\n log.Fatal(err)\\n }\\n\\n otel.SetTracerProvider(\\n sdktrace.NewTracerProvider(\\n sdktrace.WithSampler(sdktrace.AlwaysSample()),\\n sdktrace.WithBatcher(exporter),\\n ),\\n )\\n\\totel.SetTextMapPropagator(\\n\\t\\tpropagation.NewCompositeTextMapPropagator(\\n\\t\\t\\tpropagation.Baggage{},\\n\\t\\t\\tpropagation.TraceContext{},\\n\\t\\t),\\n\\t)\\n return exporter.Shutdown\\n}\\n```\\n\\nFor instrumenting connections to Redis, we will add a tracing hook to it, and in order to instrument Gin, we will add the OTel middleware. This will automatically capture all interactions with our application, since Gin will be fully instrumented. In addition, all outgoing connections to Redis will also be instrumented.\\n\\n```go\\n// Initialize Redis client\\n\\trdb := redis.NewClient(&redis.Options{\\n\\t\\tAddr: redisHost + \\":\\" + redisPort,\\n\\t\\tPassword: \\"\\",\\n\\t\\tDB: 0,\\n\\t})\\n\\trdb.AddHook(redisotel.NewTracingHook())\\n\\t// Initialize router\\n\\tr := gin.New()\\n\\tr.Use(logrusMiddleware)\\n\\tr.Use(otelgin.Middleware(\\"go-favorite-otel-manual\\"))\\n```\\n\\n**Adding custom spans** \\nNow that we have everything added and initialized, we can add custom spans.\\n\\nIf we want to have additional instrumentation for a part of our app, we simply start a custom span and then defer ending the span.\\n\\n```go\\n// start otel span\\nctx := c.Request.Context()\\nctx, span := tracer.Start(ctx, \\"add_favorite_movies\\")\\ndefer span.End()\\n```\\n\\nFor comparison, this is the instrumented code of our sample application. You can find the full source code in [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite-otel-manual).\\n\\n```go\\npackage main\\n\\nimport (\\n\\t\\"log\\"\\n\\t\\"net/http\\"\\n\\t\\"os\\"\\n\\t\\"time\\"\\n\\t\\"context\\"\\n\\n\\t\\"github.com/go-redis/redis/v8\\"\\n\\t\\"github.com/go-redis/redis/extra/redisotel/v8\\"\\n\\n\\n\\t\\"github.com/sirupsen/logrus\\"\\n\\n\\t\\"github.com/gin-gonic/gin\\"\\n\\n \\"go.opentelemetry.io/otel\\"\\n \\"go.opentelemetry.io/otel/attribute\\"\\n \\"go.opentelemetry.io/otel/exporters/otlp/otlptrace\\"\\n \\"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc\\"\\n\\n\\t\\"go.opentelemetry.io/otel/propagation\\"\\n\\n\\t\\"google.golang.org/grpc/credentials\\"\\n\\t\\"crypto/tls\\"\\n\\n sdktrace \\"go.opentelemetry.io/otel/sdk/trace\\"\\n\\n\\t\\"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin\\"\\n\\n\\t\\"go.opentelemetry.io/otel/trace\\"\\n\\n\\t\\"strings\\"\\n\\t\\"strconv\\"\\n\\t\\"math/rand\\"\\n\\t\\"go.opentelemetry.io/otel/codes\\"\\n\\n)\\n\\nvar tracer trace.Tracer\\n\\nfunc initTracer() func(context.Context) error {\\n\\ttracer = otel.Tracer(\\"go-favorite-otel-manual\\")\\n\\n\\tcollectorURL = strings.Replace(collectorURL, \\"https://\\", \\"\\", 1)\\n\\n\\tsecureOption := otlptracegrpc.WithInsecure()\\n\\n\\t// split otlpHeaders by comma and convert to map\\n\\theaders := make(map[string]string)\\n\\tfor _, header := range strings.Split(otlpHeaders, \\",\\") {\\n\\t\\theaderParts := strings.Split(header, \\"=\\")\\n\\n\\t\\tif len(headerParts) == 2 {\\n\\t\\t\\theaders[headerParts[0]] = headerParts[1]\\n\\t\\t}\\n\\t}\\n\\n exporter, err := otlptrace.New(\\n context.Background(),\\n otlptracegrpc.NewClient(\\n secureOption,\\n otlptracegrpc.WithEndpoint(collectorURL),\\n\\t\\t\\totlptracegrpc.WithHeaders(headers),\\n\\t\\t\\totlptracegrpc.WithTLSCredentials(credentials.NewTLS(&tls.Config{})),\\n ),\\n )\\n\\n if err != nil {\\n log.Fatal(err)\\n }\\n\\n otel.SetTracerProvider(\\n sdktrace.NewTracerProvider(\\n sdktrace.WithSampler(sdktrace.AlwaysSample()),\\n sdktrace.WithBatcher(exporter),\\n //sdktrace.WithResource(resources),\\n ),\\n )\\n\\totel.SetTextMapPropagator(\\n\\t\\tpropagation.NewCompositeTextMapPropagator(\\n\\t\\t\\tpropagation.Baggage{},\\n\\t\\t\\tpropagation.TraceContext{},\\n\\t\\t),\\n\\t)\\n return exporter.Shutdown\\n}\\n\\nvar (\\n collectorURL = os.Getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")\\n\\totlpHeaders = os.Getenv(\\"OTEL_EXPORTER_OTLP_HEADERS\\")\\n)\\n\\n\\nvar logger = &logrus.Logger{\\n\\tOut: os.Stderr,\\n\\tHooks: make(logrus.LevelHooks),\\n\\tLevel: logrus.InfoLevel,\\n\\tFormatter: &logrus.JSONFormatter{\\n\\t\\tFieldMap: logrus.FieldMap{\\n\\t\\t\\tlogrus.FieldKeyTime: \\"@timestamp\\",\\n\\t\\t\\tlogrus.FieldKeyLevel: \\"log.level\\",\\n\\t\\t\\tlogrus.FieldKeyMsg: \\"message\\",\\n\\t\\t\\tlogrus.FieldKeyFunc: \\"function.name\\", // non-ECS\\n\\t\\t},\\n\\t\\tTimestampFormat: time.RFC3339Nano,\\n\\t},\\n}\\n\\nfunc main() {\\n\\tcleanup := initTracer()\\n defer cleanup(context.Background())\\n\\n\\tredisHost := os.Getenv(\\"REDIS_HOST\\")\\n\\tif redisHost == \\"\\" {\\n\\t\\tredisHost = \\"localhost\\"\\n\\t}\\n\\n\\tredisPort := os.Getenv(\\"REDIS_PORT\\")\\n\\tif redisPort == \\"\\" {\\n\\t\\tredisPort = \\"6379\\"\\n\\t}\\n\\n\\tapplicationPort := os.Getenv(\\"APPLICATION_PORT\\")\\n\\tif applicationPort == \\"\\" {\\n\\t\\tapplicationPort = \\"5000\\"\\n\\t}\\n\\n\\t// Initialize Redis client\\n\\trdb := redis.NewClient(&redis.Options{\\n\\t\\tAddr: redisHost + \\":\\" + redisPort,\\n\\t\\tPassword: \\"\\",\\n\\t\\tDB: 0,\\n\\t})\\n\\trdb.AddHook(redisotel.NewTracingHook())\\n\\n\\n\\t// Initialize router\\n\\tr := gin.New()\\n\\tr.Use(logrusMiddleware)\\n\\tr.Use(otelgin.Middleware(\\"go-favorite-otel-manual\\"))\\n\\n\\n\\t// Define routes\\n\\tr.GET(\\"/\\", func(c *gin.Context) {\\n\\t\\tcontextLogger(c).Infof(\\"Main request successful\\")\\n\\t\\tc.String(http.StatusOK, \\"Hello World!\\")\\n\\t})\\n\\n\\tr.GET(\\"/favorites\\", func(c *gin.Context) {\\n\\t\\t// artificial sleep for delayTime\\n\\t\\ttime.Sleep(time.Duration(delayTime) * time.Millisecond)\\n\\n\\t\\tuserID := c.Query(\\"user_id\\")\\n\\n\\t\\tcontextLogger(c).Infof(\\"Getting favorites for user %q\\", userID)\\n\\n\\t\\tfavorites, err := rdb.SMembers(c.Request.Context(), userID).Result()\\n\\t\\tif err != nil {\\n\\t\\t\\tcontextLogger(c).Error(\\"Failed to get favorites for user %q\\", userID)\\n\\t\\t\\tc.String(http.StatusInternalServerError, \\"Failed to get favorites\\")\\n\\t\\t\\treturn\\n\\t\\t}\\n\\n\\t\\tcontextLogger(c).Infof(\\"User %q has favorites %q\\", userID, favorites)\\n\\n\\t\\tc.JSON(http.StatusOK, gin.H{\\n\\t\\t\\t\\"favorites\\": favorites,\\n\\t\\t})\\n\\t})\\n\\n\\t// Start server\\n\\tlogger.Infof(\\"App startup\\")\\n\\tlog.Fatal(http.ListenAndServe(\\":\\"+applicationPort, r))\\n\\tlogger.Infof(\\"App stopped\\")\\n}\\n```\\n\\n### Step 2. Running the Docker image with environment variables\\n\\nAs specified in the [OTEL documentation](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/), we will use environment variables and pass in the configuration values that are found in your APM Agent’s configuration section.\\n\\nBecause Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\n\\n**Where to get these variables in Elastic Cloud and Kibana** \xae \\nYou can copy the endpoints and token from Kibana under the path /app/home#/tutorial/apm.\\n\\n![GO apm agents](/assets/images/manual-instrumentation-apps-opentelemetry/elastic-blog-GO-apm-agents.png)\\n\\nYou will need to copy the OTEL_EXPORTER_OTLP_ENDPOINT as well as the OTEL_EXPORTER_OTLP_HEADERS.\\n\\n**Build the image**\\n\\n```bash\\ndocker build -t go-otel-manual-image .\\n```\\n\\n## Run the image\\n\\n```bash\\ndocker run \\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\" \\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production,service.name=go-favorite-otel-manual\\" \\\\\\n -p 5000:5000 \\\\\\n go-otel-manual-image\\n```\\n\\nYou can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don’t currently have running. As mentioned before, you can find a more complete example using Docker compose [here](https://github.com/elastic/observability-examples/tree/main/Elastiflix).\\n\\n```bash\\ncurl localhost:500/favorites\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n```\\n\\n## How do the traces show up in Elastic?\\n\\nNow that the service is instrumented, you should see the following output in Elastic APM when looking at the transactions section of your Node.js service:\\n\\n![trace samples](/assets/images/manual-instrumentation-apps-opentelemetry/GO-trace-samples.png)\\n\\n## Conclusion\\n\\nIn this blog, we discussed the following:\\n\\n- How to manually instrument Go with OpenTelemetry\\n- How to properly initialize OpenTelemetry and add a custom span\\n- How to easily set the OTLP ENDPOINT and OTLP HEADERS with Elastic without the need for a collector\\n\\nHopefully, this provides an easy-to-understand walk-through of instrumenting Go with OpenTelemetry and how easy it is to send traces into Elastic.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-apps-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var h=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),y=(n,e)=>{for(var o in e)r(n,o,{get:e[o],enumerable:!0})},l=(n,e,o,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of g(e))!f.call(n,i)&&i!==o&&r(n,i,{get:()=>e[i],enumerable:!(a=u(e,i))||a.enumerable});return n};var v=(n,e,o)=>(o=n!=null?h(m(n)):{},l(e||!n||!n.__esModule?r(o,\\"default\\",{value:n,enumerable:!0}):o,n)),b=n=>l(r({},\\"__esModule\\",{value:!0}),n);var c=w((P,s)=>{s.exports=_jsx_runtime});var E={};y(E,{default:()=>p,frontmatter:()=>T});var t=v(c()),T={title:\\"Manual instrumentation of Go applications with OpenTelemetry\\",slug:\\"manual-instrumentation-apps-opentelemetry\\",date:\\"2023-09-12\\",description:\\"In this blog post, we will show you how to manually instrument Go applications using OpenTelemetry. We will explore how to use the proper OpenTelemetry Go packages and, in particular, work on instrumenting tracing in a Go application.\\",author:[{slug:\\"luca-wintergerst\\"}],image:\\"observability-launch-series-5-go-manual.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"go\\"},{slug:\\"instrumentation\\"}]};function d(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"DevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Observability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers \\\\u2014 a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Thanks to \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and have a large support community reducing vendor lock-in.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog post, we will show you how to manually instrument Go applications using OpenTelemetry. This approach is slightly more complex than using auto-instrumentation\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In a \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\", we also reviewed how to use the OpenTelemetry demo and connect it to Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\", as well as some of Elastic\\\\u2019s capabilities with OpenTelemetry. In this blog, we will use \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"an alternative demo application\\"}),\\", which helps highlight manual instrumentation in a simple way.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Finally, we will discuss how Elastic supports mixed-mode applications, which run with Elastic and OpenTelemetry agents. The beauty of this is that there is \\",(0,t.jsx)(e.strong,{children:\\"no need for the otel-collector\\"}),\\"! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The application that we use for this blog is called \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),\\", a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-apps-opentelemetry/GO-flowhcart.png\\",alt:\\"Elastic configuration options for OpenTelemetry\\",width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data. Some of these include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Dependencies between services, distributed tracing\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Machine learning (ML) correlations\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Log correlation\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and a unified view of the telemetry data, you will also be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"A clone of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\", or your own Go application\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Basic understanding of Docker \\\\u2014 potentially install \\",(0,t.jsx)(e.a,{href:\\"https://www.docker.com/products/docker-desktop/\\",rel:\\"nofollow\\",children:\\"Docker Desktop\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Basic understanding of Go\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The full source code including the Dockerfile used in this blog can be found on \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite-otel-manual\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". The repository also contains the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite\\",rel:\\"nofollow\\",children:\\"same application without instrumentation\\"}),\\". This allows you to compare each file and see the differences.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we begin, let\\\\u2019s look at the non-instrumented code first.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is our simple go application that can receive a GET request. Note that the code shown here is a slightly abbreviated version.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`package main\\n\\nimport (\\n\\t\\"log\\"\\n\\t\\"net/http\\"\\n\\t\\"os\\"\\n\\t\\"time\\"\\n\\n\\t\\"github.com/go-redis/redis/v8\\"\\n\\n\\t\\"github.com/sirupsen/logrus\\"\\n\\n\\t\\"github.com/gin-gonic/gin\\"\\n\\t\\"strconv\\"\\n\\t\\"math/rand\\"\\n)\\n\\nvar logger = &logrus.Logger{\\n\\tOut: os.Stderr,\\n\\tHooks: make(logrus.LevelHooks),\\n\\tLevel: logrus.InfoLevel,\\n\\tFormatter: &logrus.JSONFormatter{\\n\\t\\tFieldMap: logrus.FieldMap{\\n\\t\\t\\tlogrus.FieldKeyTime: \\"@timestamp\\",\\n\\t\\t\\tlogrus.FieldKeyLevel: \\"log.level\\",\\n\\t\\t\\tlogrus.FieldKeyMsg: \\"message\\",\\n\\t\\t\\tlogrus.FieldKeyFunc: \\"function.name\\", // non-ECS\\n\\t\\t},\\n\\t\\tTimestampFormat: time.RFC3339Nano,\\n\\t},\\n}\\n\\nfunc main() {\\n\\tdelayTime, _ := strconv.Atoi(os.Getenv(\\"TOGGLE_SERVICE_DELAY\\"))\\n\\n\\tredisHost := os.Getenv(\\"REDIS_HOST\\")\\n\\tif redisHost == \\"\\" {\\n\\t\\tredisHost = \\"localhost\\"\\n\\t}\\n\\n\\tredisPort := os.Getenv(\\"REDIS_PORT\\")\\n\\tif redisPort == \\"\\" {\\n\\t\\tredisPort = \\"6379\\"\\n\\t}\\n\\n\\tapplicationPort := os.Getenv(\\"APPLICATION_PORT\\")\\n\\tif applicationPort == \\"\\" {\\n\\t\\tapplicationPort = \\"5000\\"\\n\\t}\\n\\n\\t// Initialize Redis client\\n\\trdb := redis.NewClient(&redis.Options{\\n\\t\\tAddr: redisHost + \\":\\" + redisPort,\\n\\t\\tPassword: \\"\\",\\n\\t\\tDB: 0,\\n\\t})\\n\\n\\t// Initialize router\\n\\tr := gin.New()\\n\\tr.Use(logrusMiddleware)\\n\\n\\tr.GET(\\"/favorites\\", func(c *gin.Context) {\\n\\t\\t// artificial sleep for delayTime\\n\\t\\ttime.Sleep(time.Duration(delayTime) * time.Millisecond)\\n\\n\\t\\tuserID := c.Query(\\"user_id\\")\\n\\n\\t\\tcontextLogger(c).Infof(\\"Getting favorites for user %q\\", userID)\\n\\n\\t\\tfavorites, err := rdb.SMembers(c.Request.Context(), userID).Result()\\n\\t\\tif err != nil {\\n\\t\\t\\tcontextLogger(c).Error(\\"Failed to get favorites for user %q\\", userID)\\n\\t\\t\\tc.String(http.StatusInternalServerError, \\"Failed to get favorites\\")\\n\\t\\t\\treturn\\n\\t\\t}\\n\\n\\t\\tcontextLogger(c).Infof(\\"User %q has favorites %q\\", userID, favorites)\\n\\n\\t\\tc.JSON(http.StatusOK, gin.H{\\n\\t\\t\\t\\"favorites\\": favorites,\\n\\t\\t})\\n\\t})\\n\\n\\t// Start server\\n\\tlogger.Infof(\\"App startup\\")\\n\\tlog.Fatal(http.ListenAndServe(\\":\\"+applicationPort, r))\\n\\tlogger.Infof(\\"App stopped\\")\\n}\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-0-log-in-to-your-elastic-cloud-account\\",children:\\"Step 0. Log in to your Elastic Cloud account\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"instructions to get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-apps-opentelemetry/elastic-blog-4-free-trial.png\\",alt:\\"free trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-install-and-initialize-opentelemetry\\",children:\\"Step 1. Install and initialize OpenTelemetry\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As a first step, we\\\\u2019ll need to add some additional packages to our application.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`import (\\n \\"github.com/go-redis/redis/extra/redisotel/v8\\"\\n \\"go.opentelemetry.io/otel\\"\\n \\"go.opentelemetry.io/otel/attribute\\"\\n \\"go.opentelemetry.io/otel/exporters/otlp/otlptrace\\"\\n \\"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc\\"\\n\\n\\t\\"go.opentelemetry.io/otel/propagation\\"\\n\\n\\t\\"google.golang.org/grpc/credentials\\"\\n\\t\\"crypto/tls\\"\\n\\n sdktrace \\"go.opentelemetry.io/otel/sdk/trace\\"\\n\\n\\t\\"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin\\"\\n\\n\\t\\"go.opentelemetry.io/otel/trace\\"\\n\\t\\"go.opentelemetry.io/otel/codes\\"\\n)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This code imports necessary OpenTelemetry packages, including those for tracing, exporting, and instrumenting specific libraries like Redis.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\'Next we read the \\"OTEL_EXPORTER_OTLP_ENDPOINT\\" variable and initialize the exporter.\'}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`var (\\n collectorURL = os.Getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")\\n)\\nvar tracer trace.Tracer\\n\\n\\nfunc initTracer() func(context.Context) error {\\n\\ttracer = otel.Tracer(\\"go-favorite-otel-manual\\")\\n\\n\\t// remove https:// from the collector URL if it exists\\n\\tcollectorURL = strings.Replace(collectorURL, \\"https://\\", \\"\\", 1)\\n\\tsecretToken := os.Getenv(\\"ELASTIC_APM_SECRET_TOKEN\\")\\n\\tif secretToken == \\"\\" {\\n\\t\\tlog.Fatal(\\"ELASTIC_APM_SECRET_TOKEN is required\\")\\n\\t}\\n\\n\\tsecureOption := otlptracegrpc.WithInsecure()\\n exporter, err := otlptrace.New(\\n context.Background(),\\n otlptracegrpc.NewClient(\\n secureOption,\\n otlptracegrpc.WithEndpoint(collectorURL),\\n\\t\\t\\totlptracegrpc.WithHeaders(map[string]string{\\n\\t\\t\\t\\t\\"Authorization\\": \\"Bearer \\" + secretToken,\\n\\t\\t\\t}),\\n\\t\\t\\totlptracegrpc.WithTLSCredentials(credentials.NewTLS(&tls.Config{})),\\n ),\\n )\\n\\n if err != nil {\\n log.Fatal(err)\\n }\\n\\n otel.SetTracerProvider(\\n sdktrace.NewTracerProvider(\\n sdktrace.WithSampler(sdktrace.AlwaysSample()),\\n sdktrace.WithBatcher(exporter),\\n ),\\n )\\n\\totel.SetTextMapPropagator(\\n\\t\\tpropagation.NewCompositeTextMapPropagator(\\n\\t\\t\\tpropagation.Baggage{},\\n\\t\\t\\tpropagation.TraceContext{},\\n\\t\\t),\\n\\t)\\n return exporter.Shutdown\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For instrumenting connections to Redis, we will add a tracing hook to it, and in order to instrument Gin, we will add the OTel middleware. This will automatically capture all interactions with our application, since Gin will be fully instrumented. In addition, all outgoing connections to Redis will also be instrumented.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`// Initialize Redis client\\n\\trdb := redis.NewClient(&redis.Options{\\n\\t\\tAddr: redisHost + \\":\\" + redisPort,\\n\\t\\tPassword: \\"\\",\\n\\t\\tDB: 0,\\n\\t})\\n\\trdb.AddHook(redisotel.NewTracingHook())\\n\\t// Initialize router\\n\\tr := gin.New()\\n\\tr.Use(logrusMiddleware)\\n\\tr.Use(otelgin.Middleware(\\"go-favorite-otel-manual\\"))\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Adding custom spans\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Now that we have everything added and initialized, we can add custom spans.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"If we want to have additional instrumentation for a part of our app, we simply start a custom span and then defer ending the span.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`// start otel span\\nctx := c.Request.Context()\\nctx, span := tracer.Start(ctx, \\"add_favorite_movies\\")\\ndefer span.End()\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For comparison, this is the instrumented code of our sample application. You can find the full source code in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/go-favorite-otel-manual\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-go\\",children:`package main\\n\\nimport (\\n\\t\\"log\\"\\n\\t\\"net/http\\"\\n\\t\\"os\\"\\n\\t\\"time\\"\\n\\t\\"context\\"\\n\\n\\t\\"github.com/go-redis/redis/v8\\"\\n\\t\\"github.com/go-redis/redis/extra/redisotel/v8\\"\\n\\n\\n\\t\\"github.com/sirupsen/logrus\\"\\n\\n\\t\\"github.com/gin-gonic/gin\\"\\n\\n \\"go.opentelemetry.io/otel\\"\\n \\"go.opentelemetry.io/otel/attribute\\"\\n \\"go.opentelemetry.io/otel/exporters/otlp/otlptrace\\"\\n \\"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc\\"\\n\\n\\t\\"go.opentelemetry.io/otel/propagation\\"\\n\\n\\t\\"google.golang.org/grpc/credentials\\"\\n\\t\\"crypto/tls\\"\\n\\n sdktrace \\"go.opentelemetry.io/otel/sdk/trace\\"\\n\\n\\t\\"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin\\"\\n\\n\\t\\"go.opentelemetry.io/otel/trace\\"\\n\\n\\t\\"strings\\"\\n\\t\\"strconv\\"\\n\\t\\"math/rand\\"\\n\\t\\"go.opentelemetry.io/otel/codes\\"\\n\\n)\\n\\nvar tracer trace.Tracer\\n\\nfunc initTracer() func(context.Context) error {\\n\\ttracer = otel.Tracer(\\"go-favorite-otel-manual\\")\\n\\n\\tcollectorURL = strings.Replace(collectorURL, \\"https://\\", \\"\\", 1)\\n\\n\\tsecureOption := otlptracegrpc.WithInsecure()\\n\\n\\t// split otlpHeaders by comma and convert to map\\n\\theaders := make(map[string]string)\\n\\tfor _, header := range strings.Split(otlpHeaders, \\",\\") {\\n\\t\\theaderParts := strings.Split(header, \\"=\\")\\n\\n\\t\\tif len(headerParts) == 2 {\\n\\t\\t\\theaders[headerParts[0]] = headerParts[1]\\n\\t\\t}\\n\\t}\\n\\n exporter, err := otlptrace.New(\\n context.Background(),\\n otlptracegrpc.NewClient(\\n secureOption,\\n otlptracegrpc.WithEndpoint(collectorURL),\\n\\t\\t\\totlptracegrpc.WithHeaders(headers),\\n\\t\\t\\totlptracegrpc.WithTLSCredentials(credentials.NewTLS(&tls.Config{})),\\n ),\\n )\\n\\n if err != nil {\\n log.Fatal(err)\\n }\\n\\n otel.SetTracerProvider(\\n sdktrace.NewTracerProvider(\\n sdktrace.WithSampler(sdktrace.AlwaysSample()),\\n sdktrace.WithBatcher(exporter),\\n //sdktrace.WithResource(resources),\\n ),\\n )\\n\\totel.SetTextMapPropagator(\\n\\t\\tpropagation.NewCompositeTextMapPropagator(\\n\\t\\t\\tpropagation.Baggage{},\\n\\t\\t\\tpropagation.TraceContext{},\\n\\t\\t),\\n\\t)\\n return exporter.Shutdown\\n}\\n\\nvar (\\n collectorURL = os.Getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")\\n\\totlpHeaders = os.Getenv(\\"OTEL_EXPORTER_OTLP_HEADERS\\")\\n)\\n\\n\\nvar logger = &logrus.Logger{\\n\\tOut: os.Stderr,\\n\\tHooks: make(logrus.LevelHooks),\\n\\tLevel: logrus.InfoLevel,\\n\\tFormatter: &logrus.JSONFormatter{\\n\\t\\tFieldMap: logrus.FieldMap{\\n\\t\\t\\tlogrus.FieldKeyTime: \\"@timestamp\\",\\n\\t\\t\\tlogrus.FieldKeyLevel: \\"log.level\\",\\n\\t\\t\\tlogrus.FieldKeyMsg: \\"message\\",\\n\\t\\t\\tlogrus.FieldKeyFunc: \\"function.name\\", // non-ECS\\n\\t\\t},\\n\\t\\tTimestampFormat: time.RFC3339Nano,\\n\\t},\\n}\\n\\nfunc main() {\\n\\tcleanup := initTracer()\\n defer cleanup(context.Background())\\n\\n\\tredisHost := os.Getenv(\\"REDIS_HOST\\")\\n\\tif redisHost == \\"\\" {\\n\\t\\tredisHost = \\"localhost\\"\\n\\t}\\n\\n\\tredisPort := os.Getenv(\\"REDIS_PORT\\")\\n\\tif redisPort == \\"\\" {\\n\\t\\tredisPort = \\"6379\\"\\n\\t}\\n\\n\\tapplicationPort := os.Getenv(\\"APPLICATION_PORT\\")\\n\\tif applicationPort == \\"\\" {\\n\\t\\tapplicationPort = \\"5000\\"\\n\\t}\\n\\n\\t// Initialize Redis client\\n\\trdb := redis.NewClient(&redis.Options{\\n\\t\\tAddr: redisHost + \\":\\" + redisPort,\\n\\t\\tPassword: \\"\\",\\n\\t\\tDB: 0,\\n\\t})\\n\\trdb.AddHook(redisotel.NewTracingHook())\\n\\n\\n\\t// Initialize router\\n\\tr := gin.New()\\n\\tr.Use(logrusMiddleware)\\n\\tr.Use(otelgin.Middleware(\\"go-favorite-otel-manual\\"))\\n\\n\\n\\t// Define routes\\n\\tr.GET(\\"/\\", func(c *gin.Context) {\\n\\t\\tcontextLogger(c).Infof(\\"Main request successful\\")\\n\\t\\tc.String(http.StatusOK, \\"Hello World!\\")\\n\\t})\\n\\n\\tr.GET(\\"/favorites\\", func(c *gin.Context) {\\n\\t\\t// artificial sleep for delayTime\\n\\t\\ttime.Sleep(time.Duration(delayTime) * time.Millisecond)\\n\\n\\t\\tuserID := c.Query(\\"user_id\\")\\n\\n\\t\\tcontextLogger(c).Infof(\\"Getting favorites for user %q\\", userID)\\n\\n\\t\\tfavorites, err := rdb.SMembers(c.Request.Context(), userID).Result()\\n\\t\\tif err != nil {\\n\\t\\t\\tcontextLogger(c).Error(\\"Failed to get favorites for user %q\\", userID)\\n\\t\\t\\tc.String(http.StatusInternalServerError, \\"Failed to get favorites\\")\\n\\t\\t\\treturn\\n\\t\\t}\\n\\n\\t\\tcontextLogger(c).Infof(\\"User %q has favorites %q\\", userID, favorites)\\n\\n\\t\\tc.JSON(http.StatusOK, gin.H{\\n\\t\\t\\t\\"favorites\\": favorites,\\n\\t\\t})\\n\\t})\\n\\n\\t// Start server\\n\\tlogger.Infof(\\"App startup\\")\\n\\tlog.Fatal(http.ListenAndServe(\\":\\"+applicationPort, r))\\n\\tlogger.Infof(\\"App stopped\\")\\n}\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-running-the-docker-image-with-environment-variables\\",children:\\"Step 2. Running the Docker image with environment variables\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As specified in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/\\",rel:\\"nofollow\\",children:\\"OTEL documentation\\"}),\\", we will use environment variables and pass in the configuration values that are found in your APM Agent\\\\u2019s configuration section.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Because Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Where to get these variables in Elastic Cloud and Kibana\\"}),\\" \\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"You can copy the endpoints and token from Kibana under the path /app/home#/tutorial/apm.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-apps-opentelemetry/elastic-blog-GO-apm-agents.png\\",alt:\\"GO apm agents\\",width:\\"1600\\",height:\\"1084\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will need to copy the OTEL_EXPORTER_OTLP_ENDPOINT as well as the OTEL_EXPORTER_OTLP_HEADERS.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Build the image\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t go-otel-manual-image .\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"run-the-image\\",children:\\"Run the image\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker run \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\" \\\\\\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production,service.name=go-favorite-otel-manual\\" \\\\\\\\\\n -p 5000:5000 \\\\\\\\\\n go-otel-manual-image\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don\\\\u2019t currently have running. As mentioned before, you can find a more complete example using Docker compose \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`curl localhost:500/favorites\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"how-do-the-traces-show-up-in-elastic\\",children:\\"How do the traces show up in Elastic?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that the service is instrumented, you should see the following output in Elastic APM when looking at the transactions section of your Node.js service:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-apps-opentelemetry/GO-trace-samples.png\\",alt:\\"trace samples\\",width:\\"1600\\",height:\\"954\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"How to manually instrument Go with OpenTelemetry\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"How to properly initialize OpenTelemetry and add a custom span\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"How to easily set the OTLP ENDPOINT and OTLP HEADERS with Elastic without the need for a collector\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Hopefully, this provides an easy-to-understand walk-through of instrumenting Go with OpenTelemetry and how easy it is to send traces into Elastic.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Go: \\",(0,t.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-apps-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function p(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return b(E);})();\\n;return Component;"},"_id":"articles/manual-instrumentation-of-go-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/manual-instrumentation-of-go-applications-opentelemetry.mdx","sourceFileName":"manual-instrumentation-of-go-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/manual-instrumentation-of-go-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/manual-instrumentation-apps-opentelemetry/observability-launch-series-5-go-manual.jpg","readingTime":"14 min read","url":"/manual-instrumentation-apps-opentelemetry","headings":[{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":2,"title":"Prerequisites","href":"#prerequisites"},{"level":2,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":3,"title":"Step 0. Log in to your Elastic Cloud account","href":"#step-0-log-in-to-your-elastic-cloud-account"},{"level":3,"title":"Step 1. Install and initialize OpenTelemetry","href":"#step-1-install-and-initialize-opentelemetry"},{"level":3,"title":"Step 2. Running the Docker image with environment variables","href":"#step-2-running-the-docker-image-with-environment-variables"},{"level":2,"title":"Run the image","href":"#run-the-image"},{"level":2,"title":"How do the traces show up in Elastic?","href":"#how-do-the-traces-show-up-in-elastic"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Manual instrumentation of Java applications with OpenTelemetry","slug":"manual-instrumentation-java-apps-opentelemetry","date":"2023-08-31","description":"OpenTelemetry provides an observability framework for cloud-native software, allowing us to trace, monitor, and debug applications seamlessly. In this post, we\'ll explore how to manually instrument a Java application using OpenTelemetry.","image":"observability-launch-series-3-java-manual.jpg","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"java","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the fast-paced universe of software development, especially in the cloud-native realm, DevOps and SRE teams are increasingly emerging as essential partners in application stability and growth.\\n\\nDevOps engineers continuously optimize software delivery, while SRE teams act as the stewards of application reliability, scalability, and top-tier performance. The challenge? These teams require a cutting-edge observability solution, one that encompasses full-stack insights, empowering them to rapidly manage, monitor, and rectify potential disruptions before they culminate into operational challenges.\\n\\nObservability in our modern distributed software ecosystem goes beyond mere monitoring—it demands limitless data collection, precision in processing, and the correlation of this data into actionable insights. However, the road to achieving this holistic view is paved with obstacles: from navigating version incompatibilities to wrestling with restrictive proprietary code.\\n\\nEnter [OpenTelemetry (OTel)](https://opentelemetry.io/), with the following benefits for those who adopt it:\\n\\n- Escape vendor constraints with OTel, freeing yourself from vendor lock-in and ensuring top-notch observability.\\n- See the harmony of unified logs, metrics, and traces come together to provide a complete system view.\\n- Improve your application oversight through richer and enhanced instrumentations.\\n- Embrace the benefits of backward compatibility to protect your prior instrumentation investments.\\n- Embark on the OpenTelemetry journey with an easy learning curve, simplifying onboarding and scalability.\\n- Rely on a proven, future-ready standard to boost your confidence in every investment.\\n\\nIn this blog, we will explore how you can use [manual instrumentation in your Java](https://opentelemetry.io/docs/instrumentation/java/manual/) application using Docker, without the need to refactor any part of your application code. We will use an [application called Elastiflix](https://github.com/elastic/observability-examples). This approach is slightly more complex than using [automatic instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry).\\n\\nThe beauty of this is that there is **no need for the otel-collector**! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\n\\n## Application, prerequisites, and config\\n\\nThe application that we use for this blog is called [Elastiflix](https://github.com/elastic/observability-examples), a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\n\\nBefore we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\n\\n![Elastic configuration options for OpenTelemetry](/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-1-config.png)\\n\\nAll of Elastic Observability’s APM capabilities are available with OTel data. Some of these include:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services, distributed tracing\\n- Transactions (traces)\\n- Machine learning (ML) correlations\\n- Log correlation\\n\\nIn addition to Elastic’s APM and a unified view of the telemetry data, you will also be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\n\\n## Prerequisites\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/)\\n- A clone of the [Elastiflix demo application](https://github.com/elastic/observability-examples), or your own Java application\\n- Basic understanding of Docker — potentially install [Docker Desktop](https://www.docker.com/products/docker-desktop/)\\n- Basic understanding of Java\\n\\n## View the example source code\\n\\nThe full source code, including the Dockerfile used in this blog, can be found on [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto). The repository also contains the [same application without instrumentation](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite). This allows you to compare each file and see the differences.\\n\\nIn particular, we will be working through the following file:\\n\\n```bash\\nElastiflix/java-favorite/src/main/java/com/movieapi/ApiServlet.java\\n```\\n\\nThe following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file [here](https://github.com/elastic/observability-examples/tree/main#start-the-app), which will bring up the full project.\\n\\nBefore we begin, let’s look at the non-instrumented code first.\\n\\n## Step-by-step guide\\n\\n### Step 0. Log in to your Elastic Cloud account\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the [instructions to get started on Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![trial](/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-2-trial.png)\\n\\n### Step 1. Set up OpenTelemetry\\n\\nThe first step is to set up the OpenTelemetry SDK in your Java application. You can start by adding the OpenTelemetry Java SDK and its dependencies to your project\'s build file, such as Maven or Gradle. In our example application, we are using Maven. Add the dependencies below to your pom.xml:\\n\\n```xml\\n\\n io.opentelemetry.instrumentation\\n opentelemetry-logback-mdc-1.0\\n 1.25.1-alpha\\n \\n\\n \\n io.opentelemetry\\n opentelemetry-api\\n \\n \\n io.opentelemetry\\n opentelemetry-sdk\\n \\n \\n io.opentelemetry\\n opentelemetry-exporter-otlp\\n \\n \\n io.opentelemetry\\n opentelemetry-semconv\\n \\n \\n io.opentelemetry\\n opentelemetry-exporter-otlp-logs\\n \\n \\n io.opentelemetry.instrumentation\\n opentelemetry-logback-appender-1.0\\n 1.25.1-alpha\\n \\n```\\n\\nAnd add the following bill of materials from OpenTelemetry too:\\n\\n```xml\\n\\n \\n \\n io.opentelemetry\\n opentelemetry-bom\\n 1.25.0\\n pom\\n import\\n \\n \\n io.opentelemetry\\n opentelemetry-bom-alpha\\n 1.25.0-alpha\\n pom\\n import\\n \\n \\n \\n```\\n\\n### Step 2. Add the application configuration\\n\\nWe recommend that you add the following configuration to the application’s main method, to start before any application code. Doing it like this gives you a bit more control and flexibility and ensures that OpenTelemetry will be available at any stage of the application lifecycle. In the examples, we put this code before the Spring Boot Application startup. Elastic supports OTLP over HTTP and OTLP over GRPC. In this example, we are using GRPC.\\n\\n```java\\nString SERVICE_NAME = System.getenv(\\"OTEL_SERVICE_NAME\\");\\n\\n// set service name on all OTel signals\\nResource resource = Resource.getDefault().merge(Resource.create(Attributes.of(ResourceAttributes.SERVICE_NAME,SERVICE_NAME,ResourceAttributes.SERVICE_VERSION,\\"1.0\\",ResourceAttributes.DEPLOYMENT_ENVIRONMENT,\\"production\\")));\\n\\n// init OTel logger provider with export to OTLP\\nSdkLoggerProvider sdkLoggerProvider = SdkLoggerProvider.builder().setResource(resource).addLogRecordProcessor(BatchLogRecordProcessor.builder(OtlpGrpcLogRecordExporter.builder().setEndpoint(System.getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")).addHeader(\\"Authorization\\", \\"Bearer \\" + System.getenv(\\"ELASTIC_APM_SECRET_TOKEN\\")).build()).build()).build();\\n\\n// init OTel trace provider with export to OTLP\\nSdkTracerProvider sdkTracerProvider = SdkTracerProvider.builder().setResource(resource).setSampler(Sampler.alwaysOn()).addSpanProcessor(BatchSpanProcessor.builder(OtlpGrpcSpanExporter.builder().setEndpoint(System.getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")).addHeader(\\"Authorization\\", \\"Bearer \\" + System.getenv(\\"ELASTIC_APM_SECRET_TOKEN\\")).build()).build()).build();\\n\\n// init OTel meter provider with export to OTLP\\nSdkMeterProvider sdkMeterProvider = SdkMeterProvider.builder().setResource(resource).registerMetricReader(PeriodicMetricReader.builder(OtlpGrpcMetricExporter.builder().setEndpoint(System.getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")).addHeader(\\"Authorization\\", \\"Bearer \\" + System.getenv(\\"ELASTIC_APM_SECRET_TOKEN\\")).build()).build()).build();\\n\\n// create sdk object and set it as global\\nOpenTelemetrySdk sdk = OpenTelemetrySdk.builder().setTracerProvider(sdkTracerProvider).setLoggerProvider(sdkLoggerProvider).setMeterProvider(sdkMeterProvider).setPropagators(ContextPropagators.create(W3CTraceContextPropagator.getInstance())).build();\\n\\nGlobalOpenTelemetry.set(sdk);\\n// connect logger\\nGlobalLoggerProvider.set(sdk.getSdkLoggerProvider());\\n// Add hook to close SDK, which flushes logs\\nRuntime.getRuntime().addShutdownHook(new Thread(sdk::close));\\n```\\n\\n### Step 3. Create the Tracer and start the OpenTelemetry Span inside the TracingFilter\\n\\nIn the Spring Boot, example you will notice that we have a TracingFilter class which extends the OncePerRequestFilter class. This Filter is a component placed at the front of the request processing chain. Its primary roles are to intercept incoming requests and outgoing responses, performing tasks such as logging, authentication, transformation of request/response entities, and more. So what we do here is intercept the request as it comes into the Favorite service, so that we can pull out the headers which may contain tracing information from upstream systems.\\n\\nWe start by using the OpenTelemetry Tracer, which is a core component of OpenTelemetry that allows you to create spans, start and stop them, and add attributes and events. In your Java code, import the necessary OpenTelemetry classes and create an instance of the Tracer within your application.\\n\\nWe use this to create a new downstream span, which will continue as a child from the span created in the upstream system using the information we got from the upstream request. In our Elastiflix example, this will be the nodejs application.\\n\\n```java\\n@Override\\nprotected void doFilterInternal(jakarta.servlet.http.HttpServletRequest request, jakarta.servlet.http.HttpServletResponse response, jakarta.servlet.FilterChain filterChain) throws jakarta.servlet.ServletException, IOException {\\n Tracer tracer = GlobalOpenTelemetry.getTracer(SERVICE_NAME);\\n\\n Context extractedContext = GlobalOpenTelemetry.getPropagators()\\n .getTextMapPropagator()\\n .extract(Context.current(), request, getter);\\n\\n Span span = tracer.spanBuilder(request.getRequestURI())\\n .setSpanKind(SpanKind.SERVER)\\n .setParent(extractedContext)\\n .startSpan();\\n\\n try (Scope scope = span.makeCurrent()) {\\n filterChain.doFilter(request, response);\\n } catch (Exception e) {\\n span.setStatus(StatusCode.ERROR);\\n throw e;\\n } finally {\\n span.end();\\n }\\n }\\n```\\n\\n### Step 4. Instrument other interesting code with spans\\n\\nTo instrument with spans and track specific regions of your code, you can use the Tracer\'s SpanBuilder to create spans. To accurately measure the duration of a specific operation, make sure to start and stop the spans at the appropriate locations in your code. Use the startSpan and endSpan methods provided by the Tracer to mark the beginning and end of the span. For example, you can create a span around a specific method or operation in your code, as shown here in the handleCanary method:\\n\\n```java\\nprivate void handleCanary() throws Exception {\\n Span span = GlobalOpenTelemetry.getTracer(SERVICE_NAME).spanBuilder(\\"handleCanary\\").startSpan();\\n Scope scope = span.makeCurrent();\\n\\n///.....\\n\\n\\n span.setStatus(StatusCode.OK);\\n\\n span.end();\\n\\n scope.close();\\n }\\n```\\n\\n### Step 5. Add attributes and events to spans\\n\\nYou can enhance the spans with additional attributes and events to provide more context and details about the operation being tracked. Attributes can be key-value pairs that describe the span, while events can be used to mark significant points in the span\'s lifecycle. This is also shown in the handleCanary method:\\n\\n```java\\nprivate void handleCanary() throws Exception {\\n\\n Span.current().setAttribute(\\"canary\\", \\"test-new-feature\\");\\n Span.current().setAttribute(\\"quiz_solution\\", \\"correlations\\");\\n\\n span.addEvent(\\"a span event\\", Attributes\\n .of(AttributeKey.longKey(\\"someKey\\"), Long.valueOf(93)));\\n }\\n```\\n\\n### Step 6. Instrument backends\\n\\nLet\'s consider an example where we are instrumenting a Redis database call. We\'re using the Java OpenTelemetry SDK, and our goal is to create a trace that captures each \\"Post User Favorites\\" operation to the database.\\n\\nBelow is the Java method that performs the operation and collects telemetry data:\\n\\n```java\\npublic void postUserFavorites(String user_id, String movieID) {\\n ...\\n}\\n```\\n\\nLet\'s go through it line by line:\\n\\n**Initializing a span** \\nThe first important line of our method is where we initialize a span. A span represents a single operation within a trace, which could be a database call, a remote procedure call (RPC), or any segment of code that you want to measure.\\n\\n```java\\nSpan span = GlobalOpenTelemetry.getTracer(SERVICE_NAME).spanBuilder(\\"Redis.Post\\").setSpanKind(SpanKind.CLIENT).startSpan();\\n```\\n\\n**Setting span attributes** \\nNext, we add attributes to our span. Attributes are key-value pairs that provide additional information about the span. In order to get the backend call to appear correctly in the service map, it is critical that the attributes are set correctly for the backend call type. In this example, we set the db.system attribute to redis.\\n\\n```javascript\\nspan.setAttribute(\\"db.system\\", \\"redis\\");\\nspan.setAttribute(\\"db.connection_string\\", redisHost);\\nspan.setAttribute(\\n \\"db.statement\\",\\n \\"POST user_id \\" + user_id + \\" AND movie_id \\" + movieID\\n);\\n```\\n\\nThis will ensure calls to the backend redis backend are tracked as shown below:\\n\\n![flowchart](/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-3-flowchart.png)\\n\\n**Capturing the result of the operation** \\nWe then execute the operation we\'re interested in, within a try-catch block. If an exception occurs during the execution of the operation, we record it in the span.\\n\\n```java\\ntry (Scope scope = span.makeCurrent()) {\\n ...\\n} catch (Exception e) {\\n span.setStatus(StatusCode.ERROR, \\"Error while getting data from Redis\\");\\n span.recordException(e);\\n}\\n```\\n\\n**Closing resources** \\nFinally, we close the Redis connection and end the span.\\n\\n```java\\nfinally {\\n jedis.close();\\n span.end();\\n}\\n```\\n\\n### Step 7. Configure logging\\n\\nLogging is an essential part of application monitoring and troubleshooting. OpenTelemetry allows you to integrate with existing logging frameworks, such as Logback or Log4j, to capture logs along with the telemetry data. Configure the logging framework of your choice to capture logs related to the instrumented spans. In our example application, check out the logback configuration, which shows how to export logs directly to Elastic.\\n\\n```xml\\n\\n\\n\\n \\n false\\n true\\n true\\n \\n\\n \\n \\n %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n\\n \\n \\n\\n \\n \\n \\n\\n \\n\\n```\\n\\n### Step 8. Running the Docker image with environment variables\\n\\nAs specified in the [OTEL Java documentation](https://opentelemetry.io/docs/instrumentation/java/automatic/), we will use environment variables and pass in the configuration values to enable it to connect with [Elastic Observability’s APM server](https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html).\\n\\nBecause Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\n\\n**Getting Elastic Cloud variables** \\nYou can copy the endpoints and token from Kibana under the path `/app/home#/tutorial/apm`.\\n\\n![apm agents](/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-3-apm.png)\\n\\nYou will need to copy the following environment variable:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_ENDPOINT\\n```\\n\\nAs well as the token from:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_HEADERS\\n```\\n\\n**Build the Docker image**\\n\\n```bash\\ndocker build -t java-otel-manual-image .\\n```\\n\\n**Run the Docker image**\\n\\n```bash\\ndocker run \\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"REPLACE WITH OTEL_EXPORTER_OTLP_ENDPOINT\\" \\\\\\n -e ELASTIC_APM_SECRET_TOKEN=\\"REPLACE WITH TOKEN\\" \\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\n -e OTEL_SERVICE_NAME=\\"java-favorite-otel-manual\\" \\\\\\n -p 5000:5000 \\\\\\n java-otel-manual-image\\n```\\n\\nYou can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don’t currently have running. As mentioned before, you can find a more complete example using docker-compose [here](https://github.com/elastic/observability-examples/tree/main/Elastiflix).\\n\\n```bash\\ncurl localhost:5000/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n```\\n\\n### Step 9. Explore traces and logs in Elastic APM\\n\\nOnce you have this up and running, you can ping the endpoint for your instrumented service (in our case, this is /favorites), and you should see the app appear in Elastic APM, as shown below:\\n\\n![services](/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-5-services.png)\\n\\nIt will begin by tracking throughput and latency critical metrics for SREs to pay attention to.\\n\\nDigging in, we can see an overview of all our Transactions.\\n\\n![java favorite otel graph](/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-6-java-fave-otel.png)\\n\\nAnd look at specific transactions:\\n\\n![graph2](/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-7-graph1.png)\\n\\nClick on **Logs** , and we see that logs are also brought over. The OTel Agent will automatically bring in logs and correlate them with traces for you:\\n\\n![graph3](/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-8-graph2.png)\\n\\nThis gives you complete visibility across logs, metrics, and traces!\\n\\n## Wrapping up\\n\\nManually instrumenting your Java applications with OpenTelemetry gives you greater control over what to track and monitor. By following the steps outlined in this blog post, you can effectively monitor the performance of your Java applications, identify issues, and gain insights into the overall health of your application.\\n\\nRemember, OpenTelemetry is a powerful tool, and proper instrumentation requires careful consideration of what metrics, traces, and logs are essential for your specific use case. Experiment with different configurations, leverage the OpenTelemetry SDK for Java documentation, and continuously iterate to achieve the observability goals of your application.\\n\\nIn this blog, we discussed the following:\\n\\n- How to manually instrument Java with OpenTelemetry\\n- How to properly initialize and instrument span\\n- How to easily set the OTLP ENDPOINT and OTLP HEADERS from Elastic without the need for a collector\\n\\nHopefully, this provided an easy-to-understand walk-through of instrumenting Java with OpenTelemetry and how easy it is to send traces into Elastic.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-java-apps-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var h=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)r(n,i,{get:e[i],enumerable:!0})},l=(n,e,i,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of m(e))!y.call(n,a)&&a!==i&&r(n,a,{get:()=>e[a],enumerable:!(o=u(e,a))||o.enumerable});return n};var b=(n,e,i)=>(i=n!=null?h(g(n)):{},l(e||!n||!n.__esModule?r(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>l(r({},\\"__esModule\\",{value:!0}),n);var c=f((S,s)=>{s.exports=_jsx_runtime});var T={};w(T,{default:()=>p,frontmatter:()=>E});var t=b(c()),E={title:\\"Manual instrumentation of Java applications with OpenTelemetry\\",slug:\\"manual-instrumentation-java-apps-opentelemetry\\",date:\\"2023-08-31\\",description:\\"OpenTelemetry provides an observability framework for cloud-native software, allowing us to trace, monitor, and debug applications seamlessly. In this post, we\'ll explore how to manually instrument a Java application using OpenTelemetry.\\",author:[{slug:\\"david-hope\\"}],image:\\"observability-launch-series-3-java-manual.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"java\\"},{slug:\\"apm\\"},{slug:\\"instrumentation\\"}]};function d(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"In the fast-paced universe of software development, especially in the cloud-native realm, DevOps and SRE teams are increasingly emerging as essential partners in application stability and growth.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"DevOps engineers continuously optimize software delivery, while SRE teams act as the stewards of application reliability, scalability, and top-tier performance. The challenge? These teams require a cutting-edge observability solution, one that encompasses full-stack insights, empowering them to rapidly manage, monitor, and rectify potential disruptions before they culminate into operational challenges.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Observability in our modern distributed software ecosystem goes beyond mere monitoring\\\\u2014it demands limitless data collection, precision in processing, and the correlation of this data into actionable insights. However, the road to achieving this holistic view is paved with obstacles: from navigating version incompatibilities to wrestling with restrictive proprietary code.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Enter \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry (OTel)\\"}),\\", with the following benefits for those who adopt it:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Escape vendor constraints with OTel, freeing yourself from vendor lock-in and ensuring top-notch observability.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"See the harmony of unified logs, metrics, and traces come together to provide a complete system view.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Improve your application oversight through richer and enhanced instrumentations.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Embrace the benefits of backward compatibility to protect your prior instrumentation investments.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Embark on the OpenTelemetry journey with an easy learning curve, simplifying onboarding and scalability.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Rely on a proven, future-ready standard to boost your confidence in every investment.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this blog, we will explore how you can use \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/manual/\\",rel:\\"nofollow\\",children:\\"manual instrumentation in your Java\\"}),\\" application using Docker, without the need to refactor any part of your application code. We will use an \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"application called Elastiflix\\"}),\\". This approach is slightly more complex than using \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"automatic instrumentation\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The beauty of this is that there is \\",(0,t.jsx)(e.strong,{children:\\"no need for the otel-collector\\"}),\\"! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The application that we use for this blog is called \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),\\", a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-1-config.png\\",alt:\\"Elastic configuration options for OpenTelemetry\\",width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data. Some of these include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Dependencies between services, distributed tracing\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Machine learning (ML) correlations\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Log correlation\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and a unified view of the telemetry data, you will also be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"A clone of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\", or your own Java application\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Basic understanding of Docker \\\\u2014 potentially install \\",(0,t.jsx)(e.a,{href:\\"https://www.docker.com/products/docker-desktop/\\",rel:\\"nofollow\\",children:\\"Docker Desktop\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Basic understanding of Java\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The full source code, including the Dockerfile used in this blog, can be found on \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". The repository also contains the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite\\",rel:\\"nofollow\\",children:\\"same application without instrumentation\\"}),\\". This allows you to compare each file and see the differences.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In particular, we will be working through the following file:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`Elastiflix/java-favorite/src/main/java/com/movieapi/ApiServlet.java\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main#start-the-app\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", which will bring up the full project.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we begin, let\\\\u2019s look at the non-instrumented code first.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-0-log-in-to-your-elastic-cloud-account\\",children:\\"Step 0. Log in to your Elastic Cloud account\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"instructions to get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-2-trial.png\\",alt:\\"trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-set-up-opentelemetry\\",children:\\"Step 1. Set up OpenTelemetry\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The first step is to set up the OpenTelemetry SDK in your Java application. You can start by adding the OpenTelemetry Java SDK and its dependencies to your project\'s build file, such as Maven or Gradle. In our example application, we are using Maven. Add the dependencies below to your pom.xml:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-xml\\",children:`\\n io.opentelemetry.instrumentation\\n opentelemetry-logback-mdc-1.0\\n 1.25.1-alpha\\n \\n\\n \\n io.opentelemetry\\n opentelemetry-api\\n \\n \\n io.opentelemetry\\n opentelemetry-sdk\\n \\n \\n io.opentelemetry\\n opentelemetry-exporter-otlp\\n \\n \\n io.opentelemetry\\n opentelemetry-semconv\\n \\n \\n io.opentelemetry\\n opentelemetry-exporter-otlp-logs\\n \\n \\n io.opentelemetry.instrumentation\\n opentelemetry-logback-appender-1.0\\n 1.25.1-alpha\\n \\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And add the following bill of materials from OpenTelemetry too:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-xml\\",children:`\\n \\n \\n io.opentelemetry\\n opentelemetry-bom\\n 1.25.0\\n pom\\n import\\n \\n \\n io.opentelemetry\\n opentelemetry-bom-alpha\\n 1.25.0-alpha\\n pom\\n import\\n \\n \\n \\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-add-the-application-configuration\\",children:\\"Step 2. Add the application configuration\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We recommend that you add the following configuration to the application\\\\u2019s main method, to start before any application code. Doing it like this gives you a bit more control and flexibility and ensures that OpenTelemetry will be available at any stage of the application lifecycle. In the examples, we put this code before the Spring Boot Application startup. Elastic supports OTLP over HTTP and OTLP over GRPC. In this example, we are using GRPC.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`String SERVICE_NAME = System.getenv(\\"OTEL_SERVICE_NAME\\");\\n\\n// set service name on all OTel signals\\nResource resource = Resource.getDefault().merge(Resource.create(Attributes.of(ResourceAttributes.SERVICE_NAME,SERVICE_NAME,ResourceAttributes.SERVICE_VERSION,\\"1.0\\",ResourceAttributes.DEPLOYMENT_ENVIRONMENT,\\"production\\")));\\n\\n// init OTel logger provider with export to OTLP\\nSdkLoggerProvider sdkLoggerProvider = SdkLoggerProvider.builder().setResource(resource).addLogRecordProcessor(BatchLogRecordProcessor.builder(OtlpGrpcLogRecordExporter.builder().setEndpoint(System.getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")).addHeader(\\"Authorization\\", \\"Bearer \\" + System.getenv(\\"ELASTIC_APM_SECRET_TOKEN\\")).build()).build()).build();\\n\\n// init OTel trace provider with export to OTLP\\nSdkTracerProvider sdkTracerProvider = SdkTracerProvider.builder().setResource(resource).setSampler(Sampler.alwaysOn()).addSpanProcessor(BatchSpanProcessor.builder(OtlpGrpcSpanExporter.builder().setEndpoint(System.getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")).addHeader(\\"Authorization\\", \\"Bearer \\" + System.getenv(\\"ELASTIC_APM_SECRET_TOKEN\\")).build()).build()).build();\\n\\n// init OTel meter provider with export to OTLP\\nSdkMeterProvider sdkMeterProvider = SdkMeterProvider.builder().setResource(resource).registerMetricReader(PeriodicMetricReader.builder(OtlpGrpcMetricExporter.builder().setEndpoint(System.getenv(\\"OTEL_EXPORTER_OTLP_ENDPOINT\\")).addHeader(\\"Authorization\\", \\"Bearer \\" + System.getenv(\\"ELASTIC_APM_SECRET_TOKEN\\")).build()).build()).build();\\n\\n// create sdk object and set it as global\\nOpenTelemetrySdk sdk = OpenTelemetrySdk.builder().setTracerProvider(sdkTracerProvider).setLoggerProvider(sdkLoggerProvider).setMeterProvider(sdkMeterProvider).setPropagators(ContextPropagators.create(W3CTraceContextPropagator.getInstance())).build();\\n\\nGlobalOpenTelemetry.set(sdk);\\n// connect logger\\nGlobalLoggerProvider.set(sdk.getSdkLoggerProvider());\\n// Add hook to close SDK, which flushes logs\\nRuntime.getRuntime().addShutdownHook(new Thread(sdk::close));\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-3-create-the-tracer-and-start-the-opentelemetry-span-inside-the-tracingfilter\\",children:\\"Step 3. Create the Tracer and start the OpenTelemetry Span inside the TracingFilter\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the Spring Boot, example you will notice that we have a TracingFilter class which extends the OncePerRequestFilter class. This Filter is a component placed at the front of the request processing chain. Its primary roles are to intercept incoming requests and outgoing responses, performing tasks such as logging, authentication, transformation of request/response entities, and more. So what we do here is intercept the request as it comes into the Favorite service, so that we can pull out the headers which may contain tracing information from upstream systems.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We start by using the OpenTelemetry Tracer, which is a core component of OpenTelemetry that allows you to create spans, start and stop them, and add attributes and events. In your Java code, import the necessary OpenTelemetry classes and create an instance of the Tracer within your application.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We use this to create a new downstream span, which will continue as a child from the span created in the upstream system using the information we got from the upstream request. In our Elastiflix example, this will be the nodejs application.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`@Override\\nprotected void doFilterInternal(jakarta.servlet.http.HttpServletRequest request, jakarta.servlet.http.HttpServletResponse response, jakarta.servlet.FilterChain filterChain) throws jakarta.servlet.ServletException, IOException {\\n Tracer tracer = GlobalOpenTelemetry.getTracer(SERVICE_NAME);\\n\\n Context extractedContext = GlobalOpenTelemetry.getPropagators()\\n .getTextMapPropagator()\\n .extract(Context.current(), request, getter);\\n\\n Span span = tracer.spanBuilder(request.getRequestURI())\\n .setSpanKind(SpanKind.SERVER)\\n .setParent(extractedContext)\\n .startSpan();\\n\\n try (Scope scope = span.makeCurrent()) {\\n filterChain.doFilter(request, response);\\n } catch (Exception e) {\\n span.setStatus(StatusCode.ERROR);\\n throw e;\\n } finally {\\n span.end();\\n }\\n }\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-4-instrument-other-interesting-code-with-spans\\",children:\\"Step 4. Instrument other interesting code with spans\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To instrument with spans and track specific regions of your code, you can use the Tracer\'s SpanBuilder to create spans. To accurately measure the duration of a specific operation, make sure to start and stop the spans at the appropriate locations in your code. Use the startSpan and endSpan methods provided by the Tracer to mark the beginning and end of the span. For example, you can create a span around a specific method or operation in your code, as shown here in the handleCanary method:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`private void handleCanary() throws Exception {\\n Span span = GlobalOpenTelemetry.getTracer(SERVICE_NAME).spanBuilder(\\"handleCanary\\").startSpan();\\n Scope scope = span.makeCurrent();\\n\\n///.....\\n\\n\\n span.setStatus(StatusCode.OK);\\n\\n span.end();\\n\\n scope.close();\\n }\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-5-add-attributes-and-events-to-spans\\",children:\\"Step 5. Add attributes and events to spans\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can enhance the spans with additional attributes and events to provide more context and details about the operation being tracked. Attributes can be key-value pairs that describe the span, while events can be used to mark significant points in the span\'s lifecycle. This is also shown in the handleCanary method:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`private void handleCanary() throws Exception {\\n\\n Span.current().setAttribute(\\"canary\\", \\"test-new-feature\\");\\n Span.current().setAttribute(\\"quiz_solution\\", \\"correlations\\");\\n\\n span.addEvent(\\"a span event\\", Attributes\\n .of(AttributeKey.longKey(\\"someKey\\"), Long.valueOf(93)));\\n }\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-6-instrument-backends\\",children:\\"Step 6. Instrument backends\\"}),`\\n`,(0,t.jsx)(e.p,{children:`Let\'s consider an example where we are instrumenting a Redis database call. We\'re using the Java OpenTelemetry SDK, and our goal is to create a trace that captures each \\"Post User Favorites\\" operation to the database.`}),`\\n`,(0,t.jsx)(e.p,{children:\\"Below is the Java method that performs the operation and collects telemetry data:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`public void postUserFavorites(String user_id, String movieID) {\\n ...\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\'s go through it line by line:\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Initializing a span\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"The first important line of our method is where we initialize a span. A span represents a single operation within a trace, which could be a database call, a remote procedure call (RPC), or any segment of code that you want to measure.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`Span span = GlobalOpenTelemetry.getTracer(SERVICE_NAME).spanBuilder(\\"Redis.Post\\").setSpanKind(SpanKind.CLIENT).startSpan();\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Setting span attributes\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Next, we add attributes to our span. Attributes are key-value pairs that provide additional information about the span. In order to get the backend call to appear correctly in the service map, it is critical that the attributes are set correctly for the backend call type. In this example, we set the db.system attribute to redis.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-javascript\\",children:`span.setAttribute(\\"db.system\\", \\"redis\\");\\nspan.setAttribute(\\"db.connection_string\\", redisHost);\\nspan.setAttribute(\\n \\"db.statement\\",\\n \\"POST user_id \\" + user_id + \\" AND movie_id \\" + movieID\\n);\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This will ensure calls to the backend redis backend are tracked as shown below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-3-flowchart.png\\",alt:\\"flowchart\\",width:\\"1942\\",height:\\"319\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Capturing the result of the operation\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"We then execute the operation we\'re interested in, within a try-catch block. If an exception occurs during the execution of the operation, we record it in the span.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`try (Scope scope = span.makeCurrent()) {\\n ...\\n} catch (Exception e) {\\n span.setStatus(StatusCode.ERROR, \\"Error while getting data from Redis\\");\\n span.recordException(e);\\n}\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Closing resources\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"Finally, we close the Redis connection and end the span.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`finally {\\n jedis.close();\\n span.end();\\n}\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-7-configure-logging\\",children:\\"Step 7. Configure logging\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Logging is an essential part of application monitoring and troubleshooting. OpenTelemetry allows you to integrate with existing logging frameworks, such as Logback or Log4j, to capture logs along with the telemetry data. Configure the logging framework of your choice to capture logs related to the instrumented spans. In our example application, check out the logback configuration, which shows how to export logs directly to Elastic.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-xml\\",children:`\\n\\n\\n \\n false\\n true\\n true\\n \\n\\n \\n \\n %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n\\n \\n \\n\\n \\n \\n \\n\\n \\n\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-8-running-the-docker-image-with-environment-variables\\",children:\\"Step 8. Running the Docker image with environment variables\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As specified in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/automatic/\\",rel:\\"nofollow\\",children:\\"OTEL Java documentation\\"}),\\", we will use environment variables and pass in the configuration values to enable it to connect with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\\\u2019s APM server\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Because Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Getting Elastic Cloud variables\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"You can copy the endpoints and token from Kibana under the path \\",(0,t.jsx)(e.code,{children:\\"/app/home#/tutorial/apm\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-3-apm.png\\",alt:\\"apm agents\\",width:\\"1924\\",height:\\"1304\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will need to copy the following environment variable:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_ENDPOINT\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As well as the token from:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_HEADERS\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Build the Docker image\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t java-otel-manual-image .\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Run the Docker image\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker run \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"REPLACE WITH OTEL_EXPORTER_OTLP_ENDPOINT\\" \\\\\\\\\\n -e ELASTIC_APM_SECRET_TOKEN=\\"REPLACE WITH TOKEN\\" \\\\\\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\\\\\n -e OTEL_SERVICE_NAME=\\"java-favorite-otel-manual\\" \\\\\\\\\\n -p 5000:5000 \\\\\\\\\\n java-otel-manual-image\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don\\\\u2019t currently have running. As mentioned before, you can find a more complete example using docker-compose \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`curl localhost:5000/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-9-explore-traces-and-logs-in-elastic-apm\\",children:\\"Step 9. Explore traces and logs in Elastic APM\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you have this up and running, you can ping the endpoint for your instrumented service (in our case, this is /favorites), and you should see the app appear in Elastic APM, as shown below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-5-services.png\\",alt:\\"services\\",width:\\"1999\\",height:\\"1127\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"It will begin by tracking throughput and latency critical metrics for SREs to pay attention to.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Digging in, we can see an overview of all our Transactions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-6-java-fave-otel.png\\",alt:\\"java favorite otel graph\\",width:\\"1999\\",height:\\"1127\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And look at specific transactions:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-7-graph1.png\\",alt:\\"graph2\\",width:\\"1999\\",height:\\"1127\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click on \\",(0,t.jsx)(e.strong,{children:\\"Logs\\"}),\\" , and we see that logs are also brought over. The OTel Agent will automatically bring in logs and correlate them with traces for you:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-java-apps-opentelemetry/elastic-blog-8-graph2.png\\",alt:\\"graph3\\",width:\\"1999\\",height:\\"1127\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This gives you complete visibility across logs, metrics, and traces!\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"wrapping-up\\",children:\\"Wrapping up\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Manually instrumenting your Java applications with OpenTelemetry gives you greater control over what to track and monitor. By following the steps outlined in this blog post, you can effectively monitor the performance of your Java applications, identify issues, and gain insights into the overall health of your application.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Remember, OpenTelemetry is a powerful tool, and proper instrumentation requires careful consideration of what metrics, traces, and logs are essential for your specific use case. Experiment with different configurations, leverage the OpenTelemetry SDK for Java documentation, and continuously iterate to achieve the observability goals of your application.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"How to manually instrument Java with OpenTelemetry\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"How to properly initialize and instrument span\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"How to easily set the OTLP ENDPOINT and OTLP HEADERS from Elastic without the need for a collector\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Hopefully, this provided an easy-to-understand walk-through of instrumenting Java with OpenTelemetry and how easy it is to send traces into Elastic.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-java-apps-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Go: \\",(0,t.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function p(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return v(T);})();\\n;return Component;"},"_id":"articles/manual-instrumentation-of-java-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/manual-instrumentation-of-java-applications-opentelemetry.mdx","sourceFileName":"manual-instrumentation-of-java-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/manual-instrumentation-of-java-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/manual-instrumentation-java-apps-opentelemetry/observability-launch-series-3-java-manual.jpg","readingTime":"17 min read","url":"/manual-instrumentation-java-apps-opentelemetry","headings":[{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":2,"title":"Prerequisites","href":"#prerequisites"},{"level":2,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":3,"title":"Step 0. Log in to your Elastic Cloud account","href":"#step-0-log-in-to-your-elastic-cloud-account"},{"level":3,"title":"Step 1. Set up OpenTelemetry","href":"#step-1-set-up-opentelemetry"},{"level":3,"title":"Step 2. Add the application configuration","href":"#step-2-add-the-application-configuration"},{"level":3,"title":"Step 3. Create the Tracer and start the OpenTelemetry Span inside the TracingFilter","href":"#step-3-create-the-tracer-and-start-the-opentelemetry-span-inside-the-tracingfilter"},{"level":3,"title":"Step 4. Instrument other interesting code with spans","href":"#step-4-instrument-other-interesting-code-with-spans"},{"level":3,"title":"Step 5. Add attributes and events to spans","href":"#step-5-add-attributes-and-events-to-spans"},{"level":3,"title":"Step 6. Instrument backends","href":"#step-6-instrument-backends"},{"level":3,"title":"Step 7. Configure logging","href":"#step-7-configure-logging"},{"level":3,"title":"Step 8. Running the Docker image with environment variables","href":"#step-8-running-the-docker-image-with-environment-variables"},{"level":3,"title":"Step 9. Explore traces and logs in Elastic APM","href":"#step-9-explore-traces-and-logs-in-elastic-apm"},{"level":2,"title":"Wrapping up","href":"#wrapping-up"}]},{"title":"Manual instrumentation of .NET applications with OpenTelemetry","slug":"manual-instrumentation-net-apps-opentelemetry","date":"2023-09-01","description":"In this blog, we will look at how to manually instrument your .NET applications using OpenTelemetry, which provides a set of APIs, libraries, and agents to capture distributed traces and metrics from your application. You can analyze them in Elastic.","image":"observability-launch-series-4-net-manual.jpg","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"net","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the fast-paced universe of software development, especially in the cloud-native realm, DevOps and SRE teams are increasingly emerging as essential partners in application stability and growth.\\n\\nDevOps engineers continuously optimize software delivery, while SRE teams act as the stewards of application reliability, scalability, and top-tier performance. The challenge? These teams require a cutting-edge observability solution, one that encompasses full-stack insights, empowering them to rapidly manage, monitor, and rectify potential disruptions before they culminate into operational challenges.\\n\\nObservability in our modern distributed software ecosystem goes beyond mere monitoring — it demands limitless data collection, precision in processing, and the correlation of this data into actionable insights. However, the road to achieving this holistic view is paved with obstacles, from navigating version incompatibilities to wrestling with restrictive proprietary code.\\n\\nEnter [OpenTelemetry (OTel)](https://opentelemetry.io/), with the following benefits for those who adopt it:\\n\\n- Escape vendor constraints with OTel, freeing yourself from vendor lock-in and ensuring top-notch observability.\\n- See the harmony of unified logs, metrics, and traces come together to provide a complete system view.\\n- Improve your application oversight through richer and enhanced instrumentations.\\n- Embrace the benefits of backward compatibility to protect your prior instrumentation investments.\\n- Embark on the OpenTelemetry journey with an easy learning curve, simplifying onboarding and scalability.\\n- Rely on a proven, future-ready standard to boost your confidence in every investment.\\n- Explore manual instrumentation, enabling customized data collection to fit your unique needs.\\n- Ensure monitoring consistency across layers with a standardized observability data framework.\\n- Decouple development from operations, driving peak efficiency for both.\\n\\nIn this post, we will dive into the methodology to instrument a .NET application manually using Docker.\\n\\n## What\'s covered?\\n\\n- Instrumenting the .NET application manually\\n- Creating a Docker image for a .NET application with the OpenTelemetry instrumentation baked in\\n- Installing and running the OpenTelemetry .NET Profiler for automatic instrumentation\\n\\n## Prerequisites\\n\\n- An understanding of Docker and .NET\\n- Elastic Cloud\\n- Docker installed on your machine (we recommend docker desktop)\\n\\n## View the example source code\\n\\nThe full source code, including the Dockerfile used in this blog, can be found on [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/dotnet-login-otel-manual). The repository also contains the [same application without instrumentation](https://github.com/elastic/observability-examples/tree/main/Elastiflix/dotnet-login). This allows you to compare each file and see the differences.\\n\\nThe following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file [here](https://github.com/elastic/observability-examples/tree/main#start-the-app), which will bring up the full project.\\n\\n## Step-by-step guide\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the [instructions to get started on Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![](/assets/images/manual-instrumentation-net-apps-opentelemetry/elastic-blog-2-free-trial.png)\\n\\n## Step 1. Getting started\\n\\nIn our demonstration, we will manually instrument a .NET Core application - Login. This application simulates a simple user login service. In this example, we are only looking at Tracing since the OpenTelemetry logging instrumentation is currently at mixed maturity, as mentioned [here](https://opentelemetry.io/docs/instrumentation/).\\n\\nThe application has the following files:\\n\\n1. Program.cs\\n\\n2. Startup.cs\\n\\n3. Telemetry.cs\\n\\n4. LoginController.cs\\n\\n## Step 2. Instrumenting the application\\n\\nWhen it comes to OpenTelemetry, the .NET ecosystem presents some unique aspects. While OpenTelemetry offers its API, .NET leverages its native **System**.Diagnostics API to implement OpenTelemetry\'s Tracing API. The pre-existing constructs such as **ActivitySource** and **Activity** are aptly repurposed to comply with OpenTelemetry.\\n\\nThat said, understanding the OpenTelemetry API and its terminology remains crucial for .NET developers. It\'s pivotal in gaining full command over instrumenting your applications, and as we\'ve seen, it also extends to understanding elements of the **System**.Diagnostics API.\\n\\nFor those who might lean toward using the original OpenTelemetry APIs over the **System**.Diagnostics ones, there is also a way. OpenTelemetry provides an API shim for tracing that you can use. It enables developers to switch to OpenTelemetry APIs, and you can find more details about it in the OpenTelemetry API Shim documentation.\\n\\nBy integrating such practices into your .NET application, you can take full advantage of the powerful features OpenTelemetry provides, irrespective of whether you\'re using OpenTelemetry\'s API or the **System**.Diagnostics API.\\n\\nIn this blog, we are sticking to the default method and using the Activity convention which the **System**.Diagnostics API dictates.\\n\\nTo manually instrument a .NET application, you need to make changes in each of these files. Let\'s take a look at these changes one by one.\\n\\n### Program.cs\\n\\nThis is the entry point for our application. Here, we create an instance of IHostBuilder with default configurations. Notice how we set up a console logger with Serilog.\\n\\n```csharp\\npublic static void Main(string[] args)\\n{\\n Log.Logger = new LoggerConfiguration().WriteTo.Console().CreateLogger();\\n CreateHostBuilder(args).Build().Run();\\n}\\n```\\n\\n### Startup.cs\\n\\nIn the **Startup**.cs file, we use the **ConfigureServices** method to add the OpenTelemetry Tracing.\\n\\n```csharp\\npublic void ConfigureServices(IServiceCollection services)\\n{\\n services.AddOpenTelemetry().WithTracing(builder => builder.AddOtlpExporter()\\n .AddSource(\\"Login\\")\\n .AddAspNetCoreInstrumentation()\\n .AddOtlpExporter()\\n .ConfigureResource(resource =>\\n resource.AddService(\\n serviceName: \\"Login\\"))\\n );\\n services.AddControllers();\\n}\\n```\\n\\nThe WithTracing method enables tracing in OpenTelemetry. We add the OTLP (OpenTelemetry Protocol) exporter, which is a general-purpose telemetry data delivery protocol. We also add the AspNetCoreInstrumentation, which will automatically collect traces from our application. This is a critically important step that is not mentioned in the OpenTelemetry docs. Without adding this method, the instrumentation was not working for me for the Login application.\\n\\n### Telemetry.cs\\n\\nThis file contains the definition of our ActivitySource. The ActivitySource represents the source of the telemetry activities. It is named after the service name for your application, and this name can come from a configuration file, constants file, etc. We can use this ActivitySource to start activities.\\n\\n```csharp\\nusing System.Diagnostics;\\n\\npublic static class Telemetry\\n{\\n //...\\n\\n // Name it after the service name for your app.\\n // It can come from a config file, constants file, etc.\\n public static readonly ActivitySource LoginActivitySource = new(\\"Login\\");\\n\\n //...\\n}\\n```\\n\\nIn our case, we\'ve created an **ActivitySource** named **Login**. In our **LoginController**.cs, we use this **LoginActivitySource** to start a new activity when we begin our operations.\\n\\n```csharp\\nusing (Activity activity = Telemetry.LoginActivitySource.StartActivity(\\"SomeWork\\"))\\n{\\n // Perform operations here\\n}\\n```\\n\\nThis piece of code starts a new activity named **SomeWork** , performs some operations (in this case, generating a random user and logging them in), and then ends the activity. These activities are traced and can be analyzed later to understand the performance of the operations.\\n\\nThis **ActivitySource** is fundamental to OpenTelemetry\'s manual instrumentation. It represents the source of the activities and provides a way to start and stop activities.\\n\\n### LoginController.cs\\n\\nIn the **LoginController**.cs file, we are tracing the operations performed by the GET and POST methods. We start a new activity, **SomeWork** , before we begin our operations and dispose of it once we\'re done.\\n\\n```csharp\\nusing (Activity activity = Telemetry.LoginActivitySource.StartActivity(\\"SomeWork\\"))\\n{\\n var user = GenerateRandomUserResponse();\\n Log.Information(\\"User logged in: {UserName}\\", user);\\n return user;\\n}\\n```\\n\\nThis will track the time taken by these operations and send this data to any configured telemetry backend via the OTLP exporter.\\n\\n## Step 3. Base image setup\\n\\nNow that we have our application source code created and instrumented, it’s time to create a Dockerfile to build and run our .NET Login service.\\n\\nStart with the .NET runtime image for the base layer of our Dockerfile:\\n\\n```dockerfile\\nFROM ${ARCH}mcr.microsoft.com/dotnet/aspnet:7.0. AS base\\nWORKDIR /app\\nEXPOSE 8000\\n```\\n\\nHere, we\'re setting up the application\'s runtime environment.\\n\\n## Step 4. Building the .NET application\\n\\nThis feature of Docker is just the best. Here, we compile our .NET application. We\'ll use the SDK image. In the bad old days, we used to build on a different platform and then put the compiled code into the Docker container. This way, we are much more confident our build will replicate from a developers desktop and into production by using Docker all the way through.\\n\\n```dockerfile\\nFROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0-preview AS build\\nARG TARGETPLATFORM\\n\\nWORKDIR /src\\nCOPY [\\"login.csproj\\", \\"./\\"]\\nRUN dotnet restore \\"./login.csproj\\"\\nCOPY . .\\nWORKDIR \\"/src/.\\"\\nRUN dotnet build \\"login.csproj\\" -c Release -o /app/build\\n```\\n\\nThis section ensures that our .NET code is properly restored and compiled.\\n\\n## Step 5. Publishing the application\\n\\nOnce built, we\'ll publish the app:\\n\\n```dockerfile\\nFROM build AS publish\\nRUN dotnet publish \\"login.csproj\\" -c Release -o /app/publish\\n```\\n\\n## Step 6. Preparing the final image\\n\\nNow, let\'s set up the final runtime image:\\n\\n```dockerfile\\nFROM base AS final\\nWORKDIR /app\\nCOPY --from=publish /app/publish .\\n```\\n\\n## Step 7. Entry point setup\\n\\nLastly, set the Docker image\'s entry point to both source the OpenTelemetry instrumentation, which sets up the Environment variables required to bootstrap the .NET Profiler, and then we start our .NET application:\\n\\n```bash\\nENTRYPOINT [\\"/bin/bash\\", \\"-c\\", \\"dotnet login.dll\\"]\\n```\\n\\n## Step 8. Running the Docker image with environment variables\\n\\nTo build and run the Docker image, you\'d typically follow these steps:\\n\\n### Build the Docker image\\n\\nFirst, you\'d want to build the Docker image from your Dockerfile. Let\'s assume the Dockerfile is in the current directory, and you\'d like to name/tag your image dotnet-login-otel-image.\\n\\n```bash\\ndocker build -t dotnet-login-otel-image .\\n```\\n\\n### Run the Docker image\\n\\nAfter building the image, you\'d run it with the specified environment variables. For this, the docker **run** command is used with the -e flag for each environment variable.\\n\\n```bash\\ndocker run \\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer ${ELASTIC_APM_SECRET_TOKEN}\\" \\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"${ELASTIC_APM_SERVER_URL}\\" \\\\\\n -e OTEL_METRICS_EXPORTER=\\"otlp\\" \\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\n -e OTEL_SERVICE_NAME=\\"dotnet-login-otel-manual\\" \\\\\\n -e OTEL_TRACES_EXPORTER=\\"otlp\\" \\\\\\n dotnet-login-otel-image\\n```\\n\\nMake sure that `${ELASTIC_APM_SECRET_TOKEN}` and `${ELASTIC_APM_SERVER_URL}` are set in your shell environment, replace them with their actual values from the cloud as shown below.\\n\\n**Getting Elastic Cloud variables** \\nYou can copy the endpoints and token from Kibana under the path `/app/home#/tutorial/apm`.\\n\\n![apm agents](/assets/images/manual-instrumentation-net-apps-opentelemetry/elastic-blog-3-apm-agents.png)\\n\\nYou can also use an environment file with docker run --env-file to make the command less verbose if you have multiple environment variables.\\n\\nOnce you have this up and running, you can ping the endpoint for your instrumented service (in our case, this is /login), and you should see the app appear in Elastic APM, as shown below:\\n\\n![services](/assets/images/manual-instrumentation-net-apps-opentelemetry/services-2.png)\\n\\nIt will begin by tracking throughput and latency critical metrics for SREs to pay attention to.\\n\\nDigging in, we can see an overview of all our Transactions.\\n\\n![login](/assets/images/manual-instrumentation-net-apps-opentelemetry/manual-net-login.png)\\n\\nAnd look at specific transactions, including the “SomeWork” activity/span we created in the code above:\\n\\n![latency distribution graph](/assets/images/manual-instrumentation-net-apps-opentelemetry/latency_distribution_graph.png)\\n\\nThere is clearly an outlier here, where one transaction took over 20ms. This is likely to be due to the CLR warming up.\\n\\n## Wrapping up\\n\\nWith the code here instrumented and the Dockerfile bootstrapping the application, you\'ve transformed your simple .NET application into one that\'s instrumented with OpenTelemetry. This will aid greatly in understanding application performance, tracing errors, and gaining insights into how users interact with your software.\\n\\nRemember, observability is a crucial aspect of modern application development, especially in distributed systems. With tools like OpenTelemetry, understanding complex systems becomes a tad bit easier.\\n\\nIn this blog, we discussed the following:\\n\\n- How to manually instrument .NET with OpenTelemetry.\\n- Using standard commands in a Docker file, our instrumented application was built and started.\\n- Using OpenTelemetry and its support for multiple languages, DevOps and SRE teams can instrument their applications with ease, gaining immediate insights into the health of the entire application stack and reducing mean time to resolution (MTTR).\\n\\nSince Elastic can support a mix of methods for ingesting data whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-net-apps-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),w=(t,e)=>{for(var i in e)r(t,i,{get:e[i],enumerable:!0})},l=(t,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!y.call(t,o)&&o!==i&&r(t,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return t};var b=(t,e,i)=>(i=t!=null?p(g(t)):{},l(e||!t||!t.__esModule?r(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>l(r({},\\"__esModule\\",{value:!0}),t);var c=f((k,s)=>{s.exports=_jsx_runtime});var E={};w(E,{default:()=>d,frontmatter:()=>T});var n=b(c()),T={title:\\"Manual instrumentation of .NET applications with OpenTelemetry\\",slug:\\"manual-instrumentation-net-apps-opentelemetry\\",date:\\"2023-09-01\\",description:\\"In this blog, we will look at how to manually instrument your .NET applications using OpenTelemetry, which provides a set of APIs, libraries, and agents to capture distributed traces and metrics from your application. You can analyze them in Elastic.\\",author:[{slug:\\"david-hope\\"}],image:\\"observability-launch-series-4-net-manual.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"net\\"},{slug:\\"instrumentation\\"}]};function h(t){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"In the fast-paced universe of software development, especially in the cloud-native realm, DevOps and SRE teams are increasingly emerging as essential partners in application stability and growth.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"DevOps engineers continuously optimize software delivery, while SRE teams act as the stewards of application reliability, scalability, and top-tier performance. The challenge? These teams require a cutting-edge observability solution, one that encompasses full-stack insights, empowering them to rapidly manage, monitor, and rectify potential disruptions before they culminate into operational challenges.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Observability in our modern distributed software ecosystem goes beyond mere monitoring \\\\u2014 it demands limitless data collection, precision in processing, and the correlation of this data into actionable insights. However, the road to achieving this holistic view is paved with obstacles, from navigating version incompatibilities to wrestling with restrictive proprietary code.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Enter \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry (OTel)\\"}),\\", with the following benefits for those who adopt it:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Escape vendor constraints with OTel, freeing yourself from vendor lock-in and ensuring top-notch observability.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"See the harmony of unified logs, metrics, and traces come together to provide a complete system view.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Improve your application oversight through richer and enhanced instrumentations.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Embrace the benefits of backward compatibility to protect your prior instrumentation investments.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Embark on the OpenTelemetry journey with an easy learning curve, simplifying onboarding and scalability.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Rely on a proven, future-ready standard to boost your confidence in every investment.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Explore manual instrumentation, enabling customized data collection to fit your unique needs.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Ensure monitoring consistency across layers with a standardized observability data framework.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Decouple development from operations, driving peak efficiency for both.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this post, we will dive into the methodology to instrument a .NET application manually using Docker.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"whats-covered\\",children:\\"What\'s covered?\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Instrumenting the .NET application manually\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Creating a Docker image for a .NET application with the OpenTelemetry instrumentation baked in\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Installing and running the OpenTelemetry .NET Profiler for automatic instrumentation\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"An understanding of Docker and .NET\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Elastic Cloud\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Docker installed on your machine (we recommend docker desktop)\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The full source code, including the Dockerfile used in this blog, can be found on \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/dotnet-login-otel-manual\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". The repository also contains the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/dotnet-login\\",rel:\\"nofollow\\",children:\\"same application without instrumentation\\"}),\\". This allows you to compare each file and see the differences.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main#start-the-app\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", which will bring up the full project.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"instructions to get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-net-apps-opentelemetry/elastic-blog-2-free-trial.png\\",alt:\\"\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-1-getting-started\\",children:\\"Step 1. Getting started\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In our demonstration, we will manually instrument a .NET Core application - Login. This application simulates a simple user login service. In this example, we are only looking at Tracing since the OpenTelemetry logging instrumentation is currently at mixed maturity, as mentioned \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The application has the following files:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Program.cs\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Startup.cs\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Telemetry.cs\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"LoginController.cs\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-2-instrumenting-the-application\\",children:\\"Step 2. Instrumenting the application\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"When it comes to OpenTelemetry, the .NET ecosystem presents some unique aspects. While OpenTelemetry offers its API, .NET leverages its native \\",(0,n.jsx)(e.strong,{children:\\"System\\"}),\\".Diagnostics API to implement OpenTelemetry\'s Tracing API. The pre-existing constructs such as \\",(0,n.jsx)(e.strong,{children:\\"ActivitySource\\"}),\\" and \\",(0,n.jsx)(e.strong,{children:\\"Activity\\"}),\\" are aptly repurposed to comply with OpenTelemetry.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"That said, understanding the OpenTelemetry API and its terminology remains crucial for .NET developers. It\'s pivotal in gaining full command over instrumenting your applications, and as we\'ve seen, it also extends to understanding elements of the \\",(0,n.jsx)(e.strong,{children:\\"System\\"}),\\".Diagnostics API.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"For those who might lean toward using the original OpenTelemetry APIs over the \\",(0,n.jsx)(e.strong,{children:\\"System\\"}),\\".Diagnostics ones, there is also a way. OpenTelemetry provides an API shim for tracing that you can use. It enables developers to switch to OpenTelemetry APIs, and you can find more details about it in the OpenTelemetry API Shim documentation.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"By integrating such practices into your .NET application, you can take full advantage of the powerful features OpenTelemetry provides, irrespective of whether you\'re using OpenTelemetry\'s API or the \\",(0,n.jsx)(e.strong,{children:\\"System\\"}),\\".Diagnostics API.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In this blog, we are sticking to the default method and using the Activity convention which the \\",(0,n.jsx)(e.strong,{children:\\"System\\"}),\\".Diagnostics API dictates.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"To manually instrument a .NET application, you need to make changes in each of these files. Let\'s take a look at these changes one by one.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"programcs\\",children:\\"Program.cs\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This is the entry point for our application. Here, we create an instance of IHostBuilder with default configurations. Notice how we set up a console logger with Serilog.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`public static void Main(string[] args)\\n{\\n Log.Logger = new LoggerConfiguration().WriteTo.Console().CreateLogger();\\n CreateHostBuilder(args).Build().Run();\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"startupcs\\",children:\\"Startup.cs\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In the \\",(0,n.jsx)(e.strong,{children:\\"Startup\\"}),\\".cs file, we use the \\",(0,n.jsx)(e.strong,{children:\\"ConfigureServices\\"}),\\" method to add the OpenTelemetry Tracing.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`public void ConfigureServices(IServiceCollection services)\\n{\\n services.AddOpenTelemetry().WithTracing(builder => builder.AddOtlpExporter()\\n .AddSource(\\"Login\\")\\n .AddAspNetCoreInstrumentation()\\n .AddOtlpExporter()\\n .ConfigureResource(resource =>\\n resource.AddService(\\n serviceName: \\"Login\\"))\\n );\\n services.AddControllers();\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The WithTracing method enables tracing in OpenTelemetry. We add the OTLP (OpenTelemetry Protocol) exporter, which is a general-purpose telemetry data delivery protocol. We also add the AspNetCoreInstrumentation, which will automatically collect traces from our application. This is a critically important step that is not mentioned in the OpenTelemetry docs. Without adding this method, the instrumentation was not working for me for the Login application.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"telemetrycs\\",children:\\"Telemetry.cs\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This file contains the definition of our ActivitySource. The ActivitySource represents the source of the telemetry activities. It is named after the service name for your application, and this name can come from a configuration file, constants file, etc. We can use this ActivitySource to start activities.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`using System.Diagnostics;\\n\\npublic static class Telemetry\\n{\\n //...\\n\\n // Name it after the service name for your app.\\n // It can come from a config file, constants file, etc.\\n public static readonly ActivitySource LoginActivitySource = new(\\"Login\\");\\n\\n //...\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In our case, we\'ve created an \\",(0,n.jsx)(e.strong,{children:\\"ActivitySource\\"}),\\" named \\",(0,n.jsx)(e.strong,{children:\\"Login\\"}),\\". In our \\",(0,n.jsx)(e.strong,{children:\\"LoginController\\"}),\\".cs, we use this \\",(0,n.jsx)(e.strong,{children:\\"LoginActivitySource\\"}),\\" to start a new activity when we begin our operations.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`using (Activity activity = Telemetry.LoginActivitySource.StartActivity(\\"SomeWork\\"))\\n{\\n // Perform operations here\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This piece of code starts a new activity named \\",(0,n.jsx)(e.strong,{children:\\"SomeWork\\"}),\\" , performs some operations (in this case, generating a random user and logging them in), and then ends the activity. These activities are traced and can be analyzed later to understand the performance of the operations.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This \\",(0,n.jsx)(e.strong,{children:\\"ActivitySource\\"}),\\" is fundamental to OpenTelemetry\'s manual instrumentation. It represents the source of the activities and provides a way to start and stop activities.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"logincontrollercs\\",children:\\"LoginController.cs\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In the \\",(0,n.jsx)(e.strong,{children:\\"LoginController\\"}),\\".cs file, we are tracing the operations performed by the GET and POST methods. We start a new activity, \\",(0,n.jsx)(e.strong,{children:\\"SomeWork\\"}),\\" , before we begin our operations and dispose of it once we\'re done.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-csharp\\",children:`using (Activity activity = Telemetry.LoginActivitySource.StartActivity(\\"SomeWork\\"))\\n{\\n var user = GenerateRandomUserResponse();\\n Log.Information(\\"User logged in: {UserName}\\", user);\\n return user;\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This will track the time taken by these operations and send this data to any configured telemetry backend via the OTLP exporter.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-3-base-image-setup\\",children:\\"Step 3. Base image setup\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that we have our application source code created and instrumented, it\\\\u2019s time to create a Dockerfile to build and run our .NET Login service.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Start with the .NET runtime image for the base layer of our Dockerfile:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM \\\\${ARCH}mcr.microsoft.com/dotnet/aspnet:7.0. AS base\\nWORKDIR /app\\nEXPOSE 8000\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here, we\'re setting up the application\'s runtime environment.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-4-building-the-net-application\\",children:\\"Step 4. Building the .NET application\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This feature of Docker is just the best. Here, we compile our .NET application. We\'ll use the SDK image. In the bad old days, we used to build on a different platform and then put the compiled code into the Docker container. This way, we are much more confident our build will replicate from a developers desktop and into production by using Docker all the way through.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:8.0-preview AS build\\nARG TARGETPLATFORM\\n\\nWORKDIR /src\\nCOPY [\\"login.csproj\\", \\"./\\"]\\nRUN dotnet restore \\"./login.csproj\\"\\nCOPY . .\\nWORKDIR \\"/src/.\\"\\nRUN dotnet build \\"login.csproj\\" -c Release -o /app/build\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"This section ensures that our .NET code is properly restored and compiled.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-5-publishing-the-application\\",children:\\"Step 5. Publishing the application\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once built, we\'ll publish the app:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM build AS publish\\nRUN dotnet publish \\"login.csproj\\" -c Release -o /app/publish\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-6-preparing-the-final-image\\",children:\\"Step 6. Preparing the final image\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now, let\'s set up the final runtime image:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM base AS final\\nWORKDIR /app\\nCOPY --from=publish /app/publish .\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-7-entry-point-setup\\",children:\\"Step 7. Entry point setup\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Lastly, set the Docker image\'s entry point to both source the OpenTelemetry instrumentation, which sets up the Environment variables required to bootstrap the .NET Profiler, and then we start our .NET application:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`ENTRYPOINT [\\"/bin/bash\\", \\"-c\\", \\"dotnet login.dll\\"]\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-8-running-the-docker-image-with-environment-variables\\",children:\\"Step 8. Running the Docker image with environment variables\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To build and run the Docker image, you\'d typically follow these steps:\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"build-the-docker-image\\",children:\\"Build the Docker image\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"First, you\'d want to build the Docker image from your Dockerfile. Let\'s assume the Dockerfile is in the current directory, and you\'d like to name/tag your image dotnet-login-otel-image.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t dotnet-login-otel-image .\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"run-the-docker-image\\",children:\\"Run the Docker image\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"After building the image, you\'d run it with the specified environment variables. For this, the docker \\",(0,n.jsx)(e.strong,{children:\\"run\\"}),\\" command is used with the -e flag for each environment variable.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker run \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\\\${ELASTIC_APM_SECRET_TOKEN}\\" \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\\\${ELASTIC_APM_SERVER_URL}\\" \\\\\\\\\\n -e OTEL_METRICS_EXPORTER=\\"otlp\\" \\\\\\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production\\" \\\\\\\\\\n -e OTEL_SERVICE_NAME=\\"dotnet-login-otel-manual\\" \\\\\\\\\\n -e OTEL_TRACES_EXPORTER=\\"otlp\\" \\\\\\\\\\n dotnet-login-otel-image\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Make sure that \\",(0,n.jsx)(e.code,{children:\\"${ELASTIC_APM_SECRET_TOKEN}\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"${ELASTIC_APM_SERVER_URL}\\"}),\\" are set in your shell environment, replace them with their actual values from the cloud as shown below.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Getting Elastic Cloud variables\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"You can copy the endpoints and token from Kibana under the path \\",(0,n.jsx)(e.code,{children:\\"/app/home#/tutorial/apm\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-net-apps-opentelemetry/elastic-blog-3-apm-agents.png\\",alt:\\"apm agents\\",width:\\"1924\\",height:\\"1304\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You can also use an environment file with docker run --env-file to make the command less verbose if you have multiple environment variables.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once you have this up and running, you can ping the endpoint for your instrumented service (in our case, this is /login), and you should see the app appear in Elastic APM, as shown below:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-net-apps-opentelemetry/services-2.png\\",alt:\\"services\\",width:\\"1576\\",height:\\"820\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"It will begin by tracking throughput and latency critical metrics for SREs to pay attention to.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Digging in, we can see an overview of all our Transactions.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-net-apps-opentelemetry/manual-net-login.png\\",alt:\\"login\\",width:\\"1571\\",height:\\"813\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"And look at specific transactions, including the \\\\u201CSomeWork\\\\u201D activity/span we created in the code above:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-net-apps-opentelemetry/latency_distribution_graph.png\\",alt:\\"latency distribution graph\\",width:\\"1577\\",height:\\"823\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"There is clearly an outlier here, where one transaction took over 20ms. This is likely to be due to the CLR warming up.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"wrapping-up\\",children:\\"Wrapping up\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"With the code here instrumented and the Dockerfile bootstrapping the application, you\'ve transformed your simple .NET application into one that\'s instrumented with OpenTelemetry. This will aid greatly in understanding application performance, tracing errors, and gaining insights into how users interact with your software.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Remember, observability is a crucial aspect of modern application development, especially in distributed systems. With tools like OpenTelemetry, understanding complex systems becomes a tad bit easier.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"How to manually instrument .NET with OpenTelemetry.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Using standard commands in a Docker file, our instrumented application was built and started.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Using OpenTelemetry and its support for multiple languages, DevOps and SRE teams can instrument their applications with ease, gaining immediate insights into the health of the entire application stack and reducing mean time to resolution (MTTR).\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Since Elastic can support a mix of methods for ingesting data whether it be using auto-instrumentation of open-source OpenTelemetry or manual instrumentation with its native APM agents, you can plan your migration to OTel by focusing on a few applications first and then using OpenTelemety across your applications later on in a manner that best fits your business needs.\\"}),`\\n`,(0,n.jsxs)(e.blockquote,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Python: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Java: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Node.js: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\".NET: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-net-apps-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Go: \\",(0,n.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return v(E);})();\\n;return Component;"},"_id":"articles/manual-instrumentation-of-net-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/manual-instrumentation-of-net-applications-opentelemetry.mdx","sourceFileName":"manual-instrumentation-of-net-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/manual-instrumentation-of-net-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/manual-instrumentation-net-apps-opentelemetry/observability-launch-series-4-net-manual.jpg","readingTime":"12 min read","url":"/manual-instrumentation-net-apps-opentelemetry","headings":[{"level":2,"title":"What\'s covered?","href":"#whats-covered"},{"level":2,"title":"Prerequisites","href":"#prerequisites"},{"level":2,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":2,"title":"Step 1. Getting started","href":"#step-1-getting-started"},{"level":2,"title":"Step 2. Instrumenting the application","href":"#step-2-instrumenting-the-application"},{"level":3,"title":"Program.cs","href":"#programcs"},{"level":3,"title":"Startup.cs","href":"#startupcs"},{"level":3,"title":"Telemetry.cs","href":"#telemetrycs"},{"level":3,"title":"LoginController.cs","href":"#logincontrollercs"},{"level":2,"title":"Step 3. Base image setup","href":"#step-3-base-image-setup"},{"level":2,"title":"Step 4. Building the .NET application","href":"#step-4-building-the-net-application"},{"level":2,"title":"Step 5. Publishing the application","href":"#step-5-publishing-the-application"},{"level":2,"title":"Step 6. Preparing the final image","href":"#step-6-preparing-the-final-image"},{"level":2,"title":"Step 7. Entry point setup","href":"#step-7-entry-point-setup"},{"level":2,"title":"Step 8. Running the Docker image with environment variables","href":"#step-8-running-the-docker-image-with-environment-variables"},{"level":3,"title":"Build the Docker image","href":"#build-the-docker-image"},{"level":3,"title":"Run the Docker image","href":"#run-the-docker-image"},{"level":2,"title":"Wrapping up","href":"#wrapping-up"}]},{"title":"Manual instrumentation with OpenTelemetry for Node.js applications","slug":"manual-instrumentation-nodejs-apps-opentelemetry","date":"2023-08-31","description":"In this blog post, we will show you how to manually instrument Node.js applications using OpenTelemetry. We will explore how to use the proper OpenTelemetry Node.js libraries and in particular work on instrumenting tracing in a Node.js application.","image":"observability-launch-series-1-node-js-manual_(1).jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"nodejs","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nDevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\n\\nObservability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers — a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\n\\nThanks to [OpenTelemetry](https://opentelemetry.io) (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and have a large support community reducing vendor lock-in.\\n\\nIn a [previous blog](https://www.elastic.co/blog/opentelemetry-observability), we also reviewed how to use the [OpenTelemetry demo](https://github.com/elastic/opentelemetry-demo) and connect it to Elastic\xae, as well as some of Elastic’s capabilities with OpenTelemetry and Kubernetes.\\n\\nIn this blog, we will show how to use [manual instrumentation for OpenTelemetry](https://opentelemetry.io/docs/instrumentation/java/manual/) with the Node.js service of our [application called Elastiflix](https://github.com/elastic/observability-examples). This approach is slightly more complex than using [auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry).\\n\\nThe beauty of this is that there is **no need for the otel-collector**! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\n\\n## Application, prerequisites, and config\\n\\nThe application that we use for this blog is called [Elastiflix](https://github.com/elastic/observability-examples), a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\n\\nBefore we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\n\\n![Configuration](/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-1-config.png)\\n\\nAll of Elastic Observability’s APM capabilities are available with OTel data. Some of these include:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services, distributed tracing\\n- Transactions (traces)\\n- Machine learning (ML) correlations\\n- Log correlation\\n\\nIn addition to Elastic’s APM and a unified view of the telemetry data, you will also be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\n\\n## Prerequisites\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/)\\n- A clone of the [Elastiflix demo application](https://github.com/elastic/observability-examples), or your own Node.js application\\n- Basic understanding of Docker — potentially install [Docker Desktop](https://www.docker.com/products/docker-desktop/)\\n- Basic understanding of Node.js\\n\\n## View the example source code\\n\\nThe full source code, including the Dockerfile used in this blog, can be found on [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/node-server-otel-manual). The repository also contains the [same application without instrumentation](https://github.com/elastic/observability-examples/tree/main/Elastiflix/node-server). This allows you to compare each file and see the differences.\\n\\nBefore we begin, let’s look at the non-instrumented code first.\\n\\nThis is our simple index.js file that can receive a POST request. See the full code [here](https://github.com/elastic/observability-examples/blob/main/Elastiflix/node-server-otel-manual/index.js).\\n\\n```javascript\\nconst pino = require(\\"pino\\");\\nconst ecsFormat = require(\\"@elastic/ecs-pino-format\\"); //\\nconst log = pino({ ...ecsFormat({ convertReqRes: true }) });\\nconst expressPino = require(\\"express-pino-logger\\")({ logger: log });\\n\\nvar API_ENDPOINT_FAVORITES =\\n process.env.API_ENDPOINT_FAVORITES || \\"127.0.0.1:5000\\";\\nAPI_ENDPOINT_FAVORITES = API_ENDPOINT_FAVORITES.split(\\",\\");\\n\\nconst express = require(\\"express\\");\\nconst cors = require(\\"cors\\")({ origin: true });\\nconst cookieParser = require(\\"cookie-parser\\");\\nconst { json } = require(\\"body-parser\\");\\n\\nconst PORT = process.env.PORT || 3001;\\n\\nconst app = express().use(cookieParser(), cors, json(), expressPino);\\n\\nconst axios = require(\\"axios\\");\\n\\napp.use(express.json());\\napp.use(express.urlencoded({ extended: false }));\\napp.use((err, req, res, next) => {\\n log.error(err.stack);\\n res.status(500).json({ error: err.message, code: err.code });\\n});\\n\\nvar favorites = {};\\n\\napp.post(\\"/api/favorites\\", (req, res) => {\\n var randomIndex = Math.floor(Math.random() * API_ENDPOINT_FAVORITES.length);\\n if (process.env.THROW_NOT_A_FUNCTION_ERROR == \\"true\\" && Math.random() < 0.5) {\\n // randomly choose one of the endpoints\\n axios\\n .post(\\n \\"https://\\" +\\n API_ENDPOINT_FAVORITES[randomIndex] +\\n \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n // quiz solution: \\"42\\"\\n res.jsonn({ favorites: favorites });\\n })\\n .catch(function (error) {\\n res.json({ error: error, favorites: [] });\\n });\\n } else {\\n axios\\n .post(\\n \\"https://\\" +\\n API_ENDPOINT_FAVORITES[randomIndex] +\\n \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n res.json({ favorites: favorites });\\n })\\n .catch(function (error) {\\n res.json({ error: error, favorites: [] });\\n });\\n }\\n});\\n\\napp.listen(PORT, () => {\\n console.log(`Server listening on ${PORT}`);\\n});\\n```\\n\\n## Step-by-step guide\\n\\n### Step 0. Log in to your Elastic Cloud account\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the [instructions to get started on Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![trial](/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-2-trial.png)\\n\\n### Step 1. Install and initialize OpenTelemetry\\n\\nAs a first step, we’ll need to add some additional modules to our application.\\n\\n```javascript\\nconst opentelemetry = require(\\"@opentelemetry/api\\");\\nconst { NodeTracerProvider } = require(\\"@opentelemetry/sdk-trace-node\\");\\nconst { BatchSpanProcessor } = require(\\"@opentelemetry/sdk-trace-base\\");\\nconst { Resource } = require(\\"@opentelemetry/resources\\");\\nconst {\\n SemanticResourceAttributes,\\n} = require(\\"@opentelemetry/semantic-conventions\\");\\n\\nconst { registerInstrumentations } = require(\\"@opentelemetry/instrumentation\\");\\nconst { HttpInstrumentation } = require(\\"@opentelemetry/instrumentation-http\\");\\nconst {\\n ExpressInstrumentation,\\n} = require(\\"@opentelemetry/instrumentation-express\\");\\n```\\n\\nWe start by creating a collectorOptions object with parameters such as the url and headers for connecting to the Elastic APM Server or OpenTelemetry collector.\\n\\n```javascript\\nconst collectorOptions = {\\n url: OTEL_EXPORTER_OTLP_ENDPOINT,\\n headers: OTEL_EXPORTER_OTLP_HEADERS,\\n};\\n```\\n\\nIn order to pass additional parameters to OpenTelemetry, we will read the OTEL_RESOURCE_ATTRIBUTES variable and convert it into an object.\\n\\n```javascript\\nconst envAttributes = process.env.OTEL_RESOURCE_ATTRIBUTES || \\"\\";\\n\\n// Parse the environment variable string into an object\\nconst attributes = envAttributes.split(\\",\\").reduce((acc, curr) => {\\n const [key, value] = curr.split(\\"=\\");\\n if (key && value) {\\n acc[key.trim()] = value.trim();\\n }\\n return acc;\\n}, {});\\n```\\n\\nNext we will then use these parameters to populate the resources configuration.\\n\\n```javascript\\nconst resource = new Resource({\\n [SemanticResourceAttributes.SERVICE_NAME]:\\n attributes[\\"service.name\\"] || \\"node-server-otel-manual\\",\\n [SemanticResourceAttributes.SERVICE_VERSION]:\\n attributes[\\"service.version\\"] || \\"1.0.0\\",\\n [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]:\\n attributes[\\"deployment.environment\\"] || \\"production\\",\\n});\\n```\\n\\nWe then set up the trace provider using the previously created resource, followed by the exporter which takes the collectorOptions from before. The trace provider will allow us to create spans later.\\n\\nAdditionally, we specify the use of BatchSPanProcessor. The Span processor is an interface that allows hooks for span start and end method invocations.\\n\\nIn OpenTelemetry, different Span processors are offered. The BatchSPanProcessor batches span and sends them in bulk. Multiple Span processors can be configured to be active at the same time using the MultiSpanProcessor. [See OpenTelemetry documentation](https://opentelemetry.io/docs/instrumentation/java/manual/#span-processor).\\n\\nAdditionally, we added the resource module. This allows us to specify attributes such as service.name, version, and more. See [OpenTelemetry semantic conventions documentation](https://opentelemetry.io/docs/specs/otel/resource/semantic_conventions/#semantic-attributes-with-sdk-provided-default-value) for more details.\\n\\n```javascript\\nconst tracerProvider = new NodeTracerProvider({\\n resource: resource,\\n});\\n\\nconst exporter = new OTLPTraceExporter(collectorOptions);\\ntracerProvider.addSpanProcessor(new BatchSpanProcessor(exporter));\\ntracerProvider.register();\\n```\\n\\nNext, we are going to register some instrumentations. This will automatically instrument Express and HTTP for us. While it’s possible to do this step fully manually as well, it would be complex and a waste of time. This way we can ensure that any incoming and outgoing request is captured properly and that functionality such as distributed tracing works without any additional work.\\n\\n```javascript\\nregisterInstrumentations({\\n instrumentations: [new HttpInstrumentation(), new ExpressInstrumentation()],\\n tracerProvider: tracerProvider,\\n});\\n```\\n\\nAs a last step, we will now get an instance of the tracer that we can use to create custom spans.\\n\\n```javascript\\nconst tracer = opentelemetry.trace.getTracer();\\n```\\n\\n### Step 2. Adding custom spans\\n\\nNow that we have the modules added and initialized, we can add custom spans.\\n\\nOur sample application has a POST request which calls a downstream service. If we want to have additional instrumentation for this part of our app, we simply wrap the function code with:\\n\\n```javascript\\ntracer.startActiveSpan(\'favorites\', tracer.startActiveSpan(\'favorites\', (span) => {...\\n```\\n\\nThe wrapped code is as follows:\\n\\n```javascript\\napp.post(\\"/api/favorites\\", (req, res, next) => {\\n tracer.startActiveSpan(\\"favorites\\", (span) => {\\n axios\\n .post(\\n \\"https://\\" + API_ENDPOINT_FAVORITES + \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n span.end();\\n res.jsonn({ favorites: favorites });\\n })\\n .catch(next);\\n });\\n});\\n```\\n\\n**Automatic error handling** \\nFor automatic error handling, we are adding a function that we use in Express which captures the exception for any error that happens during runtime.\\n\\n```javascript\\napp.use((err, req, res, next) => {\\n log.error(err.stack);\\n span = opentelemetry.trace.getActiveSpan();\\n span.recordException(error);\\n span.end();\\n res.status(500).json({ error: err.message, code: err.code });\\n});\\n```\\n\\n**Additional code** \\nn addition to modules and span instrumentation, the sample application also checks some environment variables at startup. When sending data to Elastic without an OTel collector, the OTEL_EXPORTER_OTLP_HEADERS variable is required as it contains the authentication. The same is true for OTEL_EXPORTER_OTLP_ENDPOINT, the host where we’ll send the telemetry data.\\n\\n```javascript\\nconst OTEL_EXPORTER_OTLP_HEADERS = process.env.OTEL_EXPORTER_OTLP_HEADERS;\\n// error if secret token is not set\\nif (!OTEL_EXPORTER_OTLP_HEADERS) {\\n throw new Error(\\"OTEL_EXPORTER_OTLP_HEADERS environment variable is not set\\");\\n}\\n\\nconst OTEL_EXPORTER_OTLP_ENDPOINT = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;\\n// error if server url is not set\\nif (!OTEL_EXPORTER_OTLP_ENDPOINT) {\\n throw new Error(\\n \\"OTEL_EXPORTER_OTLP_ENDPOINT environment variable is not set\\"\\n );\\n}\\n```\\n\\n**Final code** \\nFor comparison, this is the instrumented code of our sample application. You can find the full source code in [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/node-server-otel-manual).\\n\\n```javascript\\nconst pino = require(\\"pino\\");\\nconst ecsFormat = require(\\"@elastic/ecs-pino-format\\"); //\\nconst log = pino({ ...ecsFormat({ convertReqRes: true }) });\\nconst expressPino = require(\\"express-pino-logger\\")({ logger: log });\\n\\n// Add OpenTelemetry packages\\nconst opentelemetry = require(\\"@opentelemetry/api\\");\\nconst { NodeTracerProvider } = require(\\"@opentelemetry/sdk-trace-node\\");\\nconst { BatchSpanProcessor } = require(\\"@opentelemetry/sdk-trace-base\\");\\nconst {\\n OTLPTraceExporter,\\n} = require(\\"@opentelemetry/exporter-trace-otlp-grpc\\");\\nconst { Resource } = require(\\"@opentelemetry/resources\\");\\nconst {\\n SemanticResourceAttributes,\\n} = require(\\"@opentelemetry/semantic-conventions\\");\\n\\nconst { registerInstrumentations } = require(\\"@opentelemetry/instrumentation\\");\\n\\n// Import OpenTelemetry instrumentations\\nconst { HttpInstrumentation } = require(\\"@opentelemetry/instrumentation-http\\");\\nconst {\\n ExpressInstrumentation,\\n} = require(\\"@opentelemetry/instrumentation-express\\");\\n\\nvar API_ENDPOINT_FAVORITES =\\n process.env.API_ENDPOINT_FAVORITES || \\"127.0.0.1:5000\\";\\nAPI_ENDPOINT_FAVORITES = API_ENDPOINT_FAVORITES.split(\\",\\");\\n\\nconst OTEL_EXPORTER_OTLP_HEADERS = process.env.OTEL_EXPORTER_OTLP_HEADERS;\\n// error if secret token is not set\\nif (!OTEL_EXPORTER_OTLP_HEADERS) {\\n throw new Error(\\"OTEL_EXPORTER_OTLP_HEADERS environment variable is not set\\");\\n}\\n\\nconst OTEL_EXPORTER_OTLP_ENDPOINT = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;\\n// error if server url is not set\\nif (!OTEL_EXPORTER_OTLP_ENDPOINT) {\\n throw new Error(\\n \\"OTEL_EXPORTER_OTLP_ENDPOINT environment variable is not set\\"\\n );\\n}\\n\\nconst collectorOptions = {\\n // url is optional and can be omitted - default is https://localhost:4317\\n // Unix domain sockets are also supported: \'unix:///path/to/socket.sock\'\\n url: OTEL_EXPORTER_OTLP_ENDPOINT,\\n headers: OTEL_EXPORTER_OTLP_HEADERS,\\n};\\n\\nconst envAttributes = process.env.OTEL_RESOURCE_ATTRIBUTES || \\"\\";\\n\\n// Parse the environment variable string into an object\\nconst attributes = envAttributes.split(\\",\\").reduce((acc, curr) => {\\n const [key, value] = curr.split(\\"=\\");\\n if (key && value) {\\n acc[key.trim()] = value.trim();\\n }\\n return acc;\\n}, {});\\n\\n// Create and configure the resource object\\nconst resource = new Resource({\\n [SemanticResourceAttributes.SERVICE_NAME]:\\n attributes[\\"service.name\\"] || \\"node-server-otel-manual\\",\\n [SemanticResourceAttributes.SERVICE_VERSION]:\\n attributes[\\"service.version\\"] || \\"1.0.0\\",\\n [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]:\\n attributes[\\"deployment.environment\\"] || \\"production\\",\\n});\\n\\n// Create and configure the tracer provider\\nconst tracerProvider = new NodeTracerProvider({\\n resource: resource,\\n});\\nconst exporter = new OTLPTraceExporter(collectorOptions);\\ntracerProvider.addSpanProcessor(new BatchSpanProcessor(exporter));\\ntracerProvider.register();\\n\\n//Register instrumentations\\nregisterInstrumentations({\\n instrumentations: [new HttpInstrumentation(), new ExpressInstrumentation()],\\n tracerProvider: tracerProvider,\\n});\\n\\nconst express = require(\\"express\\");\\nconst cors = require(\\"cors\\")({ origin: true });\\nconst cookieParser = require(\\"cookie-parser\\");\\nconst { json } = require(\\"body-parser\\");\\n\\nconst PORT = process.env.PORT || 3001;\\n\\nconst app = express().use(cookieParser(), cors, json(), expressPino);\\n\\nconst axios = require(\\"axios\\");\\n\\napp.use(express.json());\\napp.use(express.urlencoded({ extended: false }));\\napp.use((err, req, res, next) => {\\n log.error(err.stack);\\n span = opentelemetry.trace.getActiveSpan();\\n span.recordException(error);\\n span.end();\\n res.status(500).json({ error: err.message, code: err.code });\\n});\\n\\nconst tracer = opentelemetry.trace.getTracer();\\n\\nvar favorites = {};\\n\\napp.post(\\"/api/favorites\\", (req, res, next) => {\\n tracer.startActiveSpan(\\"favorites\\", (span) => {\\n var randomIndex = Math.floor(Math.random() * API_ENDPOINT_FAVORITES.length);\\n\\n if (\\n process.env.THROW_NOT_A_FUNCTION_ERROR == \\"true\\" &&\\n Math.random() < 0.5\\n ) {\\n // randomly choose one of the endpoints\\n axios\\n .post(\\n \\"https://\\" +\\n API_ENDPOINT_FAVORITES[randomIndex] +\\n \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n // quiz solution: \\"42\\"\\n span.end();\\n res.jsonn({ favorites: favorites });\\n })\\n .catch(next);\\n } else {\\n axios\\n .post(\\n \\"https://\\" +\\n API_ENDPOINT_FAVORITES[randomIndex] +\\n \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n span.end();\\n res.json({ favorites: favorites });\\n })\\n .catch(next);\\n }\\n });\\n});\\n\\napp.listen(PORT, () => {\\n log.info(`Server listening on ${PORT}`);\\n});\\n```\\n\\n### Step 3. Running the Docker image with environment variables\\n\\nWe will use environment variables and pass in the configuration values to enable it to connect with [Elastic Observability’s APM server](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html).\\n\\nBecause Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\n\\n**Getting Elastic Cloud variables** \\nYou can copy the endpoints and token from Kibana\xae under the path `/app/home#/tutorial/apm`.\\n\\n![apm](/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-3-apm.png)\\n\\nYou will need to copy the following environment variables:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n```\\n\\n**Build the image**\\n\\n```bash\\ndocker build -t node-otel-manual-image .\\n```\\n\\n**Run the image**\\n\\n```bash\\ndocker run \\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\" \\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production,service.name=node-server-otel-manual\\" \\\\\\n -p 3001:3001 \\\\\\n node-otel-manual-image\\n```\\n\\nYou can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on some downstream services that you may not have running on your machine.\\n\\n```bash\\ncurl localhost:3001/api/login\\ncurl localhost:3001/api/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:3001/api/favorites\\"; sleep 1; done;\\n```\\n\\n### Step 4. Explore in Elastic APM\\n\\nNow that the service is instrumented, you should see the following output in Elastic APM when looking at the transactions section of your Node.js service:\\n\\n![graphs](/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-4-graphs.png)\\n\\nNotice how this mirrors the auto-instrumented version.\\n\\n![graphs-2](/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-4-graphs.png)\\n\\n## Is it worth it?\\n\\nThis is the million-dollar question. Depending on what level of detail you need, it\'s potentially necessary to manually instrument. Manual instrumentation lets you add custom spans, custom labels, and metrics where you want or need them. It allows you to get a level of detail that otherwise would not be possible and is oftentimes important for tracking business-specific KPIs.\\n\\nYour operations, and whether you need to troubleshoot or analyze the performance of specific parts of the code, will dictate when and what to instrument. But it’s helpful to know that you have the option to manually instrument.\\n\\nIf you noticed we didn’t yet instrument metrics, that is another blog. We discussed logs in a [previous blog](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic).\\n\\n## Conclusion\\n\\nIn this blog, we discussed the following:\\n\\n- How to manually instrument Node.js with OpenTelemetry\\n- The different modules needed when using Express\\n- How to properly initialize and instrument span\\n- How to easily set the OTLP ENDPOINT and OTLP HEADERS from Elastic without the need for a collector\\n\\nHopefully, this provides an easy-to-understand walk-through of instrumenting Node.js with OpenTelemetry and how easy it is to send traces into Elastic.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-nodejs-apps-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var h=Object.create;var i=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),E=(t,e)=>{for(var r in e)i(t,r,{get:e[r],enumerable:!0})},a=(t,e,r,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!g.call(t,o)&&o!==r&&i(t,o,{get:()=>e[o],enumerable:!(s=u(e,o))||s.enumerable});return t};var v=(t,e,r)=>(r=t!=null?h(f(t)):{},a(e||!t||!t.__esModule?i(r,\\"default\\",{value:t,enumerable:!0}):r,t)),T=t=>a(i({},\\"__esModule\\",{value:!0}),t);var c=w((P,l)=>{l.exports=_jsx_runtime});var b={};E(b,{default:()=>d,frontmatter:()=>y});var n=v(c()),y={title:\\"Manual instrumentation with OpenTelemetry for Node.js applications\\",slug:\\"manual-instrumentation-nodejs-apps-opentelemetry\\",date:\\"2023-08-31\\",description:\\"In this blog post, we will show you how to manually instrument Node.js applications using OpenTelemetry. We will explore how to use the proper OpenTelemetry Node.js libraries and in particular work on instrumenting tracing in a Node.js application.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"observability-launch-series-1-node-js-manual_(1).jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"nodejs\\"},{slug:\\"instrumentation\\"}]};function p(t){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"DevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Observability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers \\\\u2014 a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Thanks to \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and have a large support community reducing vendor lock-in.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\", we also reviewed how to use the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry demo\\"}),\\" and connect it to Elastic\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\", as well as some of Elastic\\\\u2019s capabilities with OpenTelemetry and Kubernetes.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In this blog, we will show how to use \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/manual/\\",rel:\\"nofollow\\",children:\\"manual instrumentation for OpenTelemetry\\"}),\\" with the Node.js service of our \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"application called Elastiflix\\"}),\\". This approach is slightly more complex than using \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"auto-instrumentation\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The beauty of this is that there is \\",(0,n.jsx)(e.strong,{children:\\"no need for the otel-collector\\"}),\\"! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The application that we use for this blog is called \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),\\", a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Before we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-1-config.png\\",alt:\\"Configuration\\",width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data. Some of these include:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Dependencies between services, distributed tracing\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Machine learning (ML) correlations\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Log correlation\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and a unified view of the telemetry data, you will also be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"A clone of the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\", or your own Node.js application\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Basic understanding of Docker \\\\u2014 potentially install \\",(0,n.jsx)(e.a,{href:\\"https://www.docker.com/products/docker-desktop/\\",rel:\\"nofollow\\",children:\\"Docker Desktop\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Basic understanding of Node.js\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The full source code, including the Dockerfile used in this blog, can be found on \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/node-server-otel-manual\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". The repository also contains the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/node-server\\",rel:\\"nofollow\\",children:\\"same application without instrumentation\\"}),\\". This allows you to compare each file and see the differences.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Before we begin, let\\\\u2019s look at the non-instrumented code first.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This is our simple index.js file that can receive a POST request. See the full code \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/blob/main/Elastiflix/node-server-otel-manual/index.js\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`const pino = require(\\"pino\\");\\nconst ecsFormat = require(\\"@elastic/ecs-pino-format\\"); //\\nconst log = pino({ ...ecsFormat({ convertReqRes: true }) });\\nconst expressPino = require(\\"express-pino-logger\\")({ logger: log });\\n\\nvar API_ENDPOINT_FAVORITES =\\n process.env.API_ENDPOINT_FAVORITES || \\"127.0.0.1:5000\\";\\nAPI_ENDPOINT_FAVORITES = API_ENDPOINT_FAVORITES.split(\\",\\");\\n\\nconst express = require(\\"express\\");\\nconst cors = require(\\"cors\\")({ origin: true });\\nconst cookieParser = require(\\"cookie-parser\\");\\nconst { json } = require(\\"body-parser\\");\\n\\nconst PORT = process.env.PORT || 3001;\\n\\nconst app = express().use(cookieParser(), cors, json(), expressPino);\\n\\nconst axios = require(\\"axios\\");\\n\\napp.use(express.json());\\napp.use(express.urlencoded({ extended: false }));\\napp.use((err, req, res, next) => {\\n log.error(err.stack);\\n res.status(500).json({ error: err.message, code: err.code });\\n});\\n\\nvar favorites = {};\\n\\napp.post(\\"/api/favorites\\", (req, res) => {\\n var randomIndex = Math.floor(Math.random() * API_ENDPOINT_FAVORITES.length);\\n if (process.env.THROW_NOT_A_FUNCTION_ERROR == \\"true\\" && Math.random() < 0.5) {\\n // randomly choose one of the endpoints\\n axios\\n .post(\\n \\"https://\\" +\\n API_ENDPOINT_FAVORITES[randomIndex] +\\n \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n // quiz solution: \\"42\\"\\n res.jsonn({ favorites: favorites });\\n })\\n .catch(function (error) {\\n res.json({ error: error, favorites: [] });\\n });\\n } else {\\n axios\\n .post(\\n \\"https://\\" +\\n API_ENDPOINT_FAVORITES[randomIndex] +\\n \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n res.json({ favorites: favorites });\\n })\\n .catch(function (error) {\\n res.json({ error: error, favorites: [] });\\n });\\n }\\n});\\n\\napp.listen(PORT, () => {\\n console.log(\\\\`Server listening on \\\\${PORT}\\\\`);\\n});\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-0-log-in-to-your-elastic-cloud-account\\",children:\\"Step 0. Log in to your Elastic Cloud account\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"instructions to get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-2-trial.png\\",alt:\\"trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-1-install-and-initialize-opentelemetry\\",children:\\"Step 1. Install and initialize OpenTelemetry\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"As a first step, we\\\\u2019ll need to add some additional modules to our application.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`const opentelemetry = require(\\"@opentelemetry/api\\");\\nconst { NodeTracerProvider } = require(\\"@opentelemetry/sdk-trace-node\\");\\nconst { BatchSpanProcessor } = require(\\"@opentelemetry/sdk-trace-base\\");\\nconst { Resource } = require(\\"@opentelemetry/resources\\");\\nconst {\\n SemanticResourceAttributes,\\n} = require(\\"@opentelemetry/semantic-conventions\\");\\n\\nconst { registerInstrumentations } = require(\\"@opentelemetry/instrumentation\\");\\nconst { HttpInstrumentation } = require(\\"@opentelemetry/instrumentation-http\\");\\nconst {\\n ExpressInstrumentation,\\n} = require(\\"@opentelemetry/instrumentation-express\\");\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We start by creating a collectorOptions object with parameters such as the url and headers for connecting to the Elastic APM Server or OpenTelemetry collector.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`const collectorOptions = {\\n url: OTEL_EXPORTER_OTLP_ENDPOINT,\\n headers: OTEL_EXPORTER_OTLP_HEADERS,\\n};\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"In order to pass additional parameters to OpenTelemetry, we will read the OTEL_RESOURCE_ATTRIBUTES variable and convert it into an object.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`const envAttributes = process.env.OTEL_RESOURCE_ATTRIBUTES || \\"\\";\\n\\n// Parse the environment variable string into an object\\nconst attributes = envAttributes.split(\\",\\").reduce((acc, curr) => {\\n const [key, value] = curr.split(\\"=\\");\\n if (key && value) {\\n acc[key.trim()] = value.trim();\\n }\\n return acc;\\n}, {});\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Next we will then use these parameters to populate the resources configuration.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`const resource = new Resource({\\n [SemanticResourceAttributes.SERVICE_NAME]:\\n attributes[\\"service.name\\"] || \\"node-server-otel-manual\\",\\n [SemanticResourceAttributes.SERVICE_VERSION]:\\n attributes[\\"service.version\\"] || \\"1.0.0\\",\\n [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]:\\n attributes[\\"deployment.environment\\"] || \\"production\\",\\n});\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We then set up the trace provider using the previously created resource, followed by the exporter which takes the collectorOptions from before. The trace provider will allow us to create spans later.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Additionally, we specify the use of BatchSPanProcessor. The Span processor is an interface that allows hooks for span start and end method invocations.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In OpenTelemetry, different Span processors are offered. The BatchSPanProcessor batches span and sends them in bulk. Multiple Span processors can be configured to be active at the same time using the MultiSpanProcessor. \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/manual/#span-processor\\",rel:\\"nofollow\\",children:\\"See OpenTelemetry documentation\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Additionally, we added the resource module. This allows us to specify attributes such as service.name, version, and more. See \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/resource/semantic_conventions/#semantic-attributes-with-sdk-provided-default-value\\",rel:\\"nofollow\\",children:\\"OpenTelemetry semantic conventions documentation\\"}),\\" for more details.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`const tracerProvider = new NodeTracerProvider({\\n resource: resource,\\n});\\n\\nconst exporter = new OTLPTraceExporter(collectorOptions);\\ntracerProvider.addSpanProcessor(new BatchSpanProcessor(exporter));\\ntracerProvider.register();\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Next, we are going to register some instrumentations. This will automatically instrument Express and HTTP for us. While it\\\\u2019s possible to do this step fully manually as well, it would be complex and a waste of time. This way we can ensure that any incoming and outgoing request is captured properly and that functionality such as distributed tracing works without any additional work.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`registerInstrumentations({\\n instrumentations: [new HttpInstrumentation(), new ExpressInstrumentation()],\\n tracerProvider: tracerProvider,\\n});\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"As a last step, we will now get an instance of the tracer that we can use to create custom spans.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`const tracer = opentelemetry.trace.getTracer();\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-2-adding-custom-spans\\",children:\\"Step 2. Adding custom spans\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that we have the modules added and initialized, we can add custom spans.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Our sample application has a POST request which calls a downstream service. If we want to have additional instrumentation for this part of our app, we simply wrap the function code with:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`tracer.startActiveSpan(\'favorites\', tracer.startActiveSpan(\'favorites\', (span) => {...\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The wrapped code is as follows:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`app.post(\\"/api/favorites\\", (req, res, next) => {\\n tracer.startActiveSpan(\\"favorites\\", (span) => {\\n axios\\n .post(\\n \\"https://\\" + API_ENDPOINT_FAVORITES + \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n span.end();\\n res.jsonn({ favorites: favorites });\\n })\\n .catch(next);\\n });\\n});\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Automatic error handling\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"For automatic error handling, we are adding a function that we use in Express which captures the exception for any error that happens during runtime.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`app.use((err, req, res, next) => {\\n log.error(err.stack);\\n span = opentelemetry.trace.getActiveSpan();\\n span.recordException(error);\\n span.end();\\n res.status(500).json({ error: err.message, code: err.code });\\n});\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Additional code\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"n addition to modules and span instrumentation, the sample application also checks some environment variables at startup. When sending data to Elastic without an OTel collector, the OTEL_EXPORTER_OTLP_HEADERS variable is required as it contains the authentication. The same is true for OTEL_EXPORTER_OTLP_ENDPOINT, the host where we\\\\u2019ll send the telemetry data.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`const OTEL_EXPORTER_OTLP_HEADERS = process.env.OTEL_EXPORTER_OTLP_HEADERS;\\n// error if secret token is not set\\nif (!OTEL_EXPORTER_OTLP_HEADERS) {\\n throw new Error(\\"OTEL_EXPORTER_OTLP_HEADERS environment variable is not set\\");\\n}\\n\\nconst OTEL_EXPORTER_OTLP_ENDPOINT = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;\\n// error if server url is not set\\nif (!OTEL_EXPORTER_OTLP_ENDPOINT) {\\n throw new Error(\\n \\"OTEL_EXPORTER_OTLP_ENDPOINT environment variable is not set\\"\\n );\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Final code\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"For comparison, this is the instrumented code of our sample application. You can find the full source code in \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/node-server-otel-manual\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`const pino = require(\\"pino\\");\\nconst ecsFormat = require(\\"@elastic/ecs-pino-format\\"); //\\nconst log = pino({ ...ecsFormat({ convertReqRes: true }) });\\nconst expressPino = require(\\"express-pino-logger\\")({ logger: log });\\n\\n// Add OpenTelemetry packages\\nconst opentelemetry = require(\\"@opentelemetry/api\\");\\nconst { NodeTracerProvider } = require(\\"@opentelemetry/sdk-trace-node\\");\\nconst { BatchSpanProcessor } = require(\\"@opentelemetry/sdk-trace-base\\");\\nconst {\\n OTLPTraceExporter,\\n} = require(\\"@opentelemetry/exporter-trace-otlp-grpc\\");\\nconst { Resource } = require(\\"@opentelemetry/resources\\");\\nconst {\\n SemanticResourceAttributes,\\n} = require(\\"@opentelemetry/semantic-conventions\\");\\n\\nconst { registerInstrumentations } = require(\\"@opentelemetry/instrumentation\\");\\n\\n// Import OpenTelemetry instrumentations\\nconst { HttpInstrumentation } = require(\\"@opentelemetry/instrumentation-http\\");\\nconst {\\n ExpressInstrumentation,\\n} = require(\\"@opentelemetry/instrumentation-express\\");\\n\\nvar API_ENDPOINT_FAVORITES =\\n process.env.API_ENDPOINT_FAVORITES || \\"127.0.0.1:5000\\";\\nAPI_ENDPOINT_FAVORITES = API_ENDPOINT_FAVORITES.split(\\",\\");\\n\\nconst OTEL_EXPORTER_OTLP_HEADERS = process.env.OTEL_EXPORTER_OTLP_HEADERS;\\n// error if secret token is not set\\nif (!OTEL_EXPORTER_OTLP_HEADERS) {\\n throw new Error(\\"OTEL_EXPORTER_OTLP_HEADERS environment variable is not set\\");\\n}\\n\\nconst OTEL_EXPORTER_OTLP_ENDPOINT = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;\\n// error if server url is not set\\nif (!OTEL_EXPORTER_OTLP_ENDPOINT) {\\n throw new Error(\\n \\"OTEL_EXPORTER_OTLP_ENDPOINT environment variable is not set\\"\\n );\\n}\\n\\nconst collectorOptions = {\\n // url is optional and can be omitted - default is https://localhost:4317\\n // Unix domain sockets are also supported: \'unix:///path/to/socket.sock\'\\n url: OTEL_EXPORTER_OTLP_ENDPOINT,\\n headers: OTEL_EXPORTER_OTLP_HEADERS,\\n};\\n\\nconst envAttributes = process.env.OTEL_RESOURCE_ATTRIBUTES || \\"\\";\\n\\n// Parse the environment variable string into an object\\nconst attributes = envAttributes.split(\\",\\").reduce((acc, curr) => {\\n const [key, value] = curr.split(\\"=\\");\\n if (key && value) {\\n acc[key.trim()] = value.trim();\\n }\\n return acc;\\n}, {});\\n\\n// Create and configure the resource object\\nconst resource = new Resource({\\n [SemanticResourceAttributes.SERVICE_NAME]:\\n attributes[\\"service.name\\"] || \\"node-server-otel-manual\\",\\n [SemanticResourceAttributes.SERVICE_VERSION]:\\n attributes[\\"service.version\\"] || \\"1.0.0\\",\\n [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]:\\n attributes[\\"deployment.environment\\"] || \\"production\\",\\n});\\n\\n// Create and configure the tracer provider\\nconst tracerProvider = new NodeTracerProvider({\\n resource: resource,\\n});\\nconst exporter = new OTLPTraceExporter(collectorOptions);\\ntracerProvider.addSpanProcessor(new BatchSpanProcessor(exporter));\\ntracerProvider.register();\\n\\n//Register instrumentations\\nregisterInstrumentations({\\n instrumentations: [new HttpInstrumentation(), new ExpressInstrumentation()],\\n tracerProvider: tracerProvider,\\n});\\n\\nconst express = require(\\"express\\");\\nconst cors = require(\\"cors\\")({ origin: true });\\nconst cookieParser = require(\\"cookie-parser\\");\\nconst { json } = require(\\"body-parser\\");\\n\\nconst PORT = process.env.PORT || 3001;\\n\\nconst app = express().use(cookieParser(), cors, json(), expressPino);\\n\\nconst axios = require(\\"axios\\");\\n\\napp.use(express.json());\\napp.use(express.urlencoded({ extended: false }));\\napp.use((err, req, res, next) => {\\n log.error(err.stack);\\n span = opentelemetry.trace.getActiveSpan();\\n span.recordException(error);\\n span.end();\\n res.status(500).json({ error: err.message, code: err.code });\\n});\\n\\nconst tracer = opentelemetry.trace.getTracer();\\n\\nvar favorites = {};\\n\\napp.post(\\"/api/favorites\\", (req, res, next) => {\\n tracer.startActiveSpan(\\"favorites\\", (span) => {\\n var randomIndex = Math.floor(Math.random() * API_ENDPOINT_FAVORITES.length);\\n\\n if (\\n process.env.THROW_NOT_A_FUNCTION_ERROR == \\"true\\" &&\\n Math.random() < 0.5\\n ) {\\n // randomly choose one of the endpoints\\n axios\\n .post(\\n \\"https://\\" +\\n API_ENDPOINT_FAVORITES[randomIndex] +\\n \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n // quiz solution: \\"42\\"\\n span.end();\\n res.jsonn({ favorites: favorites });\\n })\\n .catch(next);\\n } else {\\n axios\\n .post(\\n \\"https://\\" +\\n API_ENDPOINT_FAVORITES[randomIndex] +\\n \\"/favorites?user_id=1\\",\\n req.body\\n )\\n .then(function (response) {\\n favorites = response.data;\\n span.end();\\n res.json({ favorites: favorites });\\n })\\n .catch(next);\\n }\\n });\\n});\\n\\napp.listen(PORT, () => {\\n log.info(\\\\`Server listening on \\\\${PORT}\\\\`);\\n});\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-3-running-the-docker-image-with-environment-variables\\",children:\\"Step 3. Running the Docker image with environment variables\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We will use environment variables and pass in the configuration values to enable it to connect with \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\\\u2019s APM server\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Because Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Getting Elastic Cloud variables\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"You can copy the endpoints and token from Kibana\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" under the path \\",(0,n.jsx)(e.code,{children:\\"/app/home#/tutorial/apm\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-3-apm.png\\",alt:\\"apm\\",width:\\"1924\\",height:\\"1304\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You will need to copy the following environment variables:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Build the image\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t node-otel-manual-image .\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Run the image\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker run \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\" \\\\\\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production,service.name=node-server-otel-manual\\" \\\\\\\\\\n -p 3001:3001 \\\\\\\\\\n node-otel-manual-image\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on some downstream services that you may not have running on your machine.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`curl localhost:3001/api/login\\ncurl localhost:3001/api/favorites\\n\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:3001/api/favorites\\"; sleep 1; done;\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"step-4-explore-in-elastic-apm\\",children:\\"Step 4. Explore in Elastic APM\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now that the service is instrumented, you should see the following output in Elastic APM when looking at the transactions section of your Node.js service:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-4-graphs.png\\",alt:\\"graphs\\",width:\\"1999\\",height:\\"1143\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Notice how this mirrors the auto-instrumented version.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/elastic-blog-4-graphs.png\\",alt:\\"graphs-2\\",width:\\"1999\\",height:\\"1143\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"is-it-worth-it\\",children:\\"Is it worth it?\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This is the million-dollar question. Depending on what level of detail you need, it\'s potentially necessary to manually instrument. Manual instrumentation lets you add custom spans, custom labels, and metrics where you want or need them. It allows you to get a level of detail that otherwise would not be possible and is oftentimes important for tracking business-specific KPIs.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Your operations, and whether you need to troubleshoot or analyze the performance of specific parts of the code, will dictate when and what to instrument. But it\\\\u2019s helpful to know that you have the option to manually instrument.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"If you noticed we didn\\\\u2019t yet instrument metrics, that is another blog. We discussed logs in a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"How to manually instrument Node.js with OpenTelemetry\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"The different modules needed when using Express\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"How to properly initialize and instrument span\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"How to easily set the OTLP ENDPOINT and OTLP HEADERS from Elastic without the need for a collector\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Hopefully, this provides an easy-to-understand walk-through of instrumenting Node.js with OpenTelemetry and how easy it is to send traces into Elastic.\\"}),`\\n`,(0,n.jsxs)(e.blockquote,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Python: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Java: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Node.js: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-nodejs-apps-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\".NET: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"Go: \\",(0,n.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(p,{...t})}):p(t)}return T(b);})();\\n;return Component;"},"_id":"articles/manual-instrumentation-of-nodejs-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/manual-instrumentation-of-nodejs-applications-opentelemetry.mdx","sourceFileName":"manual-instrumentation-of-nodejs-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/manual-instrumentation-of-nodejs-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/manual-instrumentation-nodejs-apps-opentelemetry/observability-launch-series-1-node-js-manual_(1).jpg","readingTime":"18 min read","url":"/manual-instrumentation-nodejs-apps-opentelemetry","headings":[{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":2,"title":"Prerequisites","href":"#prerequisites"},{"level":2,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":3,"title":"Step 0. Log in to your Elastic Cloud account","href":"#step-0-log-in-to-your-elastic-cloud-account"},{"level":3,"title":"Step 1. Install and initialize OpenTelemetry","href":"#step-1-install-and-initialize-opentelemetry"},{"level":3,"title":"Step 2. Adding custom spans","href":"#step-2-adding-custom-spans"},{"level":3,"title":"Step 3. Running the Docker image with environment variables","href":"#step-3-running-the-docker-image-with-environment-variables"},{"level":3,"title":"Step 4. Explore in Elastic APM","href":"#step-4-explore-in-elastic-apm"},{"level":2,"title":"Is it worth it?","href":"#is-it-worth-it"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Manual instrumentation with OpenTelemetry for Python applications","slug":"manual-instrumentation-python-apps-opentelemetry","date":"2023-08-31","description":"In this blog post, we will show you how to manually instrument Python applications using OpenTelemetry. We will explore how to use the proper OpenTelemetry Python libraries and in particular work on instrumenting tracing in a Python application.","image":"observability-launch-series-2-python-manual_(1).jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"python","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nDevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\n\\nObservability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers — a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\n\\nThanks to [OpenTelemetry](https://opentelemetry.io) (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and have a large support community reducing vendor lock-in.\\n\\nIn a [previous blog](https://www.elastic.co/blog/opentelemetry-observability), we also reviewed how to use the [OpenTelemetry demo](https://github.com/elastic/opentelemetry-demo) and connect it to Elastic\xae, as well as some of Elastic’s capabilities with OpenTelemetry and Kubernetes.\\n\\nIn this blog, we will show how to use [manual instrumentation for OpenTelemetry](https://opentelemetry.io/docs/instrumentation/python/manual/) with the Python service of our [application called Elastiflix](https://github.com/elastic/observability-examples). This approach is slightly more complex than using [automatic instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry).\\n\\nThe beauty of this is that there is **no need for the otel-collector**! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\n\\n## Application, prerequisites, and config\\n\\nThe application that we use for this blog is called [Elastiflix](https://github.com/elastic/observability-examples), a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\n\\nBefore we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\n\\n![configuration](/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-1-config.png)\\n\\nAll of Elastic Observability’s APM capabilities are available with OTel data. Some of these include:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services, distributed tracing\\n- Transactions (traces)\\n- Machine learning (ML) correlations\\n- Log correlation\\n\\nIn addition to Elastic’s APM and a unified view of the telemetry data, you will also be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\n\\n## Prerequisites\\n\\n- An Elastic Cloud account — [sign up now](https://cloud.elastic.co/)\\n- A clone of the [Elastiflix demo application](https://github.com/elastic/observability-examples), or your own Python application\\n- Basic understanding of Docker — potentially install [Docker Desktop](https://www.docker.com/products/docker-desktop/)\\n- Basic understanding of Python\\n\\n## View the example source code\\n\\nThe full source code, including the Dockerfile used in this blog, can be found on [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto). The repository also contains the [same application without instrumentation](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite). This allows you to compare each file and see the differences.\\n\\nThe following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file [here](https://github.com/elastic/observability-examples/tree/main#start-the-app), which will bring up the full project.\\n\\nBefore we begin, let’s look at the non-instrumented code first.\\n\\nThis is our simple Python Flask application that can receive a GET request. (This is a portion of the full [main.py](https://github.com/elastic/observability-examples/blob/main/Elastiflix/python-favorite/main.py) file.)\\n\\n```python\\nfrom flask import Flask, request\\nimport sys\\n\\nimport logging\\nimport redis\\nimport os\\nimport ecs_logging\\nimport datetime\\nimport random\\nimport time\\n\\nredis_host = os.environ.get(\'REDIS_HOST\') or \'localhost\'\\nredis_port = os.environ.get(\'REDIS_PORT\') or 6379\\n\\napplication_port = os.environ.get(\'APPLICATION_PORT\') or 5000\\n\\napp = Flask(__name__)\\n\\n# Get the Logger\\nlogger = logging.getLogger(\\"app\\")\\nlogger.setLevel(logging.DEBUG)\\n\\n# Add an ECS formatter to the Handler\\nhandler = logging.StreamHandler()\\nhandler.setFormatter(ecs_logging.StdlibFormatter())\\nlogger.addHandler(handler)\\nlogging.getLogger(\'werkzeug\').setLevel(logging.ERROR)\\nlogging.getLogger(\'werkzeug\').addHandler(handler)\\n\\nr = redis.Redis(host=redis_host, port=redis_port, decode_responses=True)\\n\\n@app.route(\'/favorites\', methods=[\'GET\'])\\ndef get_favorite_movies():\\n user_id = str(request.args.get(\'user_id\'))\\n\\n logger.info(\'Getting favorites for user \' + user_id, extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": request.args.get(\'user_id\')\\n })\\n\\n favorites = r.smembers(user_id)\\n\\n # convert to list\\n favorites = list(favorites)\\n logger.info(\'User \' + user_id + \' has favorites: \' + str(favorites), extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": user_id\\n })\\n return { \\"favorites\\": favorites}\\n\\nlogger.info(\'App startup\')\\napp.run(host=\'0.0.0.0\', port=application_port)\\nlogger.info(\'App Stopped\')\\n```\\n\\n## Step-by-step guide\\n\\n### Step 0. Log in to your Elastic Cloud account\\n\\nThis blog assumes you have an Elastic Cloud account — if not, follow the [instructions to get started on Elastic Cloud](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n![trial](/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-2-trial.png)\\n\\n### Step 1. Install and initialize OpenTelemetry\\n\\nAs a first step, we’ll need to add some additional libraries to our application.\\n\\n```python\\nfrom opentelemetry import trace\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\n\\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\\nfrom opentelemetry.instrumentation.redis import RedisInstrumentor\\nfrom opentelemetry.sdk.resources import Resource\\n```\\n\\nThis code imports necessary OpenTelemetry libraries, including those for tracing, exporting, and instrumenting specific libraries like Flask, Requests, and Redis.\\n\\nNext we read the variables:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_HEADERS\\nOTEL_EXPORTER_OTLP_ENDPOINT\\n```\\n\\nAnd then initialize the exporter.\\n\\n```python\\notel_exporter_otlp_headers = os.environ.get(\'OTEL_EXPORTER_OTLP_HEADERS\')\\n\\notel_exporter_otlp_endpoint = os.environ.get(\'OTEL_EXPORTER_OTLP_ENDPOINT\')\\n\\nexporter = OTLPSpanExporter(endpoint=otel_exporter_otlp_endpoint, headers=otel_exporter_otlp_headers)\\n```\\n\\nIn order to pass additional parameters to OpenTelemetry, we will read the OTEL_RESOURCE_ATTRIBUTES variable and convert it into an object.\\n\\n```python\\nresource_attributes = os.environ.get(\'OTEL_RESOURCE_ATTRIBUTES\') or \'service.version=1.0,deployment.environment=production\'\\nkey_value_pairs = resource_attributes.split(\',\')\\nresult_dict = {}\\n\\nfor pair in key_value_pairs:\\n key, value = pair.split(\'=\')\\n result_dict[key] = value\\n```\\n\\nNext, we will then use these parameters to populate the resources configuration.\\n\\n```python\\nresourceAttributes = {\\n \\"service.name\\": otel_service_name,\\n \\"service.version\\": result_dict[\'service.version\'],\\n \\"deployment.environment\\": result_dict[\'deployment.environment\']\\n}\\n\\nresource = Resource.create(resourceAttributes)\\n```\\n\\nWe then set up the trace provider using the previously created resource. The trace provider will allow us to create spans later after getting a tracer instance from it.\\n\\nAdditionally, we specify the use of BatchSPanProcessor. The Span processor is an interface that allows hooks for span start and end method invocations.\\n\\nIn OpenTelemetry, different Span processors are offered. The BatchSPanProcessor batches span and sends them in bulk. Multiple Span processors can be configured to be active at the same time using the MultiSpanProcessor. [See OpenTelemetry documentation](https://opentelemetry.io/docs/instrumentation/java/manual/#span-processor).\\n\\nAdditionally, we added the resource module. This allows us to specify attributes such as service.name, version, and more. See [OpenTelemetry semantic conventions documentation](https://opentelemetry.io/docs/specs/otel/resource/semantic_conventions/#semantic-attributes-with-sdk-provided-default-value) for more details.\\n\\n```python\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(exporter)\\nprovider.add_span_processor(processor)\\n\\n# Sets the global default tracer provider\\ntrace.set_tracer_provider(provider)\\n\\n# Creates a tracer from the global tracer provider\\ntracer = trace.get_tracer(otel_service_name)\\n```\\n\\nFinally, because we are using Flask and Redis, we also add the following, which allows us to automatically instrument both Flask and Redis.\\n\\nTechnically you could consider this “cheating.” We are using some parts of the Python auto-instrumentation. However, it’s generally a good approach to resort to using some of the auto-instrumentation modules. This saves you a lot of time, and in addition, it ensures that functionality like distributed tracing will work automatically for any requests you receive or send.\\n\\n```python\\nFlaskInstrumentor().instrument_app(app)\\nRequestsInstrumentor().instrument()\\nRedisInstrumentor().instrument()\\n```\\n\\n### Step 2. Adding Custom Spans\\n\\nNow that we have everything added and initialized, we can add custom spans.\\n\\nIf we want to have additional instrumentation for a part of our app, we simply wrap the /favoritesGET function code using Python with:\\n\\n```python\\nwith tracer.start_as_current_span(\\"add_favorite_movies\\", set_status_on_exception=True) as span:\\n ...\\n```\\n\\nThe wrapped code is as follows:\\n\\n```python\\n@app.route(\'/favorites\', methods=[\'GET\'])\\ndef get_favorite_movies():\\n # add artificial delay if enabled\\n if delay_time > 0:\\n time.sleep(max(0, random.gauss(delay_time/1000, delay_time/1000/10)))\\n\\n with tracer.start_as_current_span(\\"get_favorite_movies\\") as span:\\n user_id = str(request.args.get(\'user_id\'))\\n\\n logger.info(\'Getting favorites for user \' + user_id, extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": request.args.get(\'user_id\')\\n })\\n\\n favorites = r.smembers(user_id)\\n\\n # convert to list\\n favorites = list(favorites)\\n logger.info(\'User \' + user_id + \' has favorites: \' + str(favorites), extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": user_id\\n })\\n```\\n\\n**Additional code**\\n\\nIn addition to modules and span instrumentation, the sample application also checks some environment variables at startup. When sending data to Elastic without an OTel collector, the OTEL_EXPORTER_OTLP_HEADERS variable is required as it contains the authentication. The same is true for OTEL_EXPORTER_OTLP_ENDPOINT, the host where we’ll send the telemetry data.\\n\\n```python\\notel_exporter_otlp_headers = os.environ.get(\'OTEL_EXPORTER_OTLP_HEADERS\')\\n# fail if secret token not set\\nif otel_exporter_otlp_headers is None:\\n raise Exception(\'OTEL_EXPORTER_OTLP_HEADERS environment variable not set\')\\n\\n\\notel_exporter_otlp_endpoint = os.environ.get(\'OTEL_EXPORTER_OTLP_ENDPOINT\')\\n# fail if server url not set\\nif otel_exporter_otlp_endpoint is None:\\n raise Exception(\'OTEL_EXPORTER_OTLP_ENDPOINT environment variable not set\')\\nelse:\\n exporter = OTLPSpanExporter(endpoint=otel_exporter_otlp_endpoint, headers=otel_exporter_otlp_headers)\\n```\\n\\n**Final code** \\nFor comparison, this is the instrumented code of our sample application. You can find the full source code in [GitHub](https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-manual).\\n\\n```python\\nfrom flask import Flask, request\\nimport sys\\n\\nimport logging\\nimport redis\\nimport os\\nimport ecs_logging\\nimport datetime\\nimport random\\nimport time\\n\\nfrom opentelemetry import trace\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\n\\n#Using grpc exporter since per the instructions in OTel docs this is needed for any endpoint receiving OTLP.\\n\\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\\nfrom opentelemetry.instrumentation.redis import RedisInstrumentor\\n#from opentelemetry.instrumentation.wsgi import OpenTelemetryMiddleware\\nfrom opentelemetry.sdk.resources import Resource\\n\\nredis_host = os.environ.get(\'REDIS_HOST\') or \'localhost\'\\nredis_port = os.environ.get(\'REDIS_PORT\') or 6379\\notel_traces_exporter = os.environ.get(\'OTEL_TRACES_EXPORTER\') or \'otlp\'\\notel_metrics_exporter = os.environ.get(\'OTEL_TRACES_EXPORTER\') or \'otlp\'\\nenvironment = os.environ.get(\'ENVIRONMENT\') or \'dev\'\\notel_service_version = os.environ.get(\'OTEL_SERVICE_VERSION\') or \'1.0.0\'\\nresource_attributes = os.environ.get(\'OTEL_RESOURCE_ATTRIBUTES\') or \'service.version=1.0,deployment.environment=production\'\\n\\notel_exporter_otlp_headers = os.environ.get(\'OTEL_EXPORTER_OTLP_HEADERS\')\\n# fail if secret token not set\\nif otel_exporter_otlp_headers is None:\\n raise Exception(\'OTEL_EXPORTER_OTLP_HEADERS environment variable not set\')\\n#else:\\n# otel_exporter_otlp_fheaders= f\\"Authorization=Bearer%20{secret_token}\\"\\n\\notel_exporter_otlp_endpoint = os.environ.get(\'OTEL_EXPORTER_OTLP_ENDPOINT\')\\n# fail if server url not set\\nif otel_exporter_otlp_endpoint is None:\\n raise Exception(\'OTEL_EXPORTER_OTLP_ENDPOINT environment variable not set\')\\nelse:\\n exporter = OTLPSpanExporter(endpoint=otel_exporter_otlp_endpoint, headers=otel_exporter_otlp_headers)\\n\\n\\nkey_value_pairs = resource_attributes.split(\',\')\\nresult_dict = {}\\n\\nfor pair in key_value_pairs:\\n key, value = pair.split(\'=\')\\n result_dict[key] = value\\n\\nresourceAttributes = {\\n \\"service.name\\": result_dict[\'service.name\'],\\n \\"service.version\\": result_dict[\'service.version\'],\\n \\"deployment.environment\\": result_dict[\'deployment.environment\']\\n# # Add more attributes as needed\\n}\\n\\nresource = Resource.create(resourceAttributes)\\n\\n\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(exporter)\\nprovider.add_span_processor(processor)\\n\\n# Sets the global default tracer provider\\ntrace.set_tracer_provider(provider)\\n\\n# Creates a tracer from the global tracer provider\\ntracer = trace.get_tracer(\\"favorite\\")\\n\\n\\napplication_port = os.environ.get(\'APPLICATION_PORT\') or 5000\\n\\napp = Flask(__name__)\\n\\n\\nFlaskInstrumentor().instrument_app(app)\\n#OpenTelemetryMiddleware().instrument()\\nRequestsInstrumentor().instrument()\\nRedisInstrumentor().instrument()\\n\\n#app.wsgi_app = OpenTelemetryMiddleware(app.wsgi_app)\\n\\n# Get the Logger\\nlogger = logging.getLogger(\\"app\\")\\nlogger.setLevel(logging.DEBUG)\\n\\n# Add an ECS formatter to the Handler\\nhandler = logging.StreamHandler()\\nhandler.setFormatter(ecs_logging.StdlibFormatter())\\nlogger.addHandler(handler)\\nlogging.getLogger(\'werkzeug\').setLevel(logging.ERROR)\\nlogging.getLogger(\'werkzeug\').addHandler(handler)\\n\\nr = redis.Redis(host=redis_host, port=redis_port, decode_responses=True)\\n\\n@app.route(\'/favorites\', methods=[\'GET\'])\\ndef get_favorite_movies():\\n with tracer.start_as_current_span(\\"get_favorite_movies\\") as span:\\n user_id = str(request.args.get(\'user_id\'))\\n\\n logger.info(\'Getting favorites for user \' + user_id, extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": request.args.get(\'user_id\')\\n })\\n\\n favorites = r.smembers(user_id)\\n\\n # convert to list\\n favorites = list(favorites)\\n logger.info(\'User \' + user_id + \' has favorites: \' + str(favorites), extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": user_id\\n })\\n return { \\"favorites\\": favorites}\\n\\nlogger.info(\'App startup\')\\napp.run(host=\'0.0.0.0\', port=application_port)\\nlogger.info(\'App Stopped\')\\n```\\n\\n### Step 3. Running the Docker image with environment variables\\n\\nAs specified in the [OTEL documentation](https://opentelemetry.io/docs/instrumentation/python/automatic/#configuring-the-agent), we will use environment variables and pass in the configuration values to enable it to connect with [Elastic Observability’s APM server](https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html).\\n\\nBecause Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\n\\n**Getting Elastic Cloud variables** \\nYou can copy the endpoints and token from Kibana\xae under the path `/app/home#/tutorial/apm`.\\n\\n![apm agents](/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-3-apm.png)\\n\\nYou will need to copy the following environment variables:\\n\\n```bash\\nOTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n```\\n\\n**Build the image**\\n\\n```bash\\ndocker build -t python-otel-manual-image .\\n```\\n\\n**Run the image**\\n\\n```bash\\ndocker run \\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\" \\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production,service.name=python-favorite-otel-manual\\" \\\\\\n -p 3001:3001 \\\\\\n python-otel-manual-image\\n```\\n\\nYou can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don’t currently have running. As mentioned before, you can find a more complete example using docker-compose [here](https://github.com/elastic/observability-examples/tree/main/Elastiflix).\\n\\n```bash\\ncurl localhost:500/favorites\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n```\\n\\n### Step 4. Explore traces, metrics, and logs in Elastic APM\\n\\nNow that the service is instrumented, you should see the following output in Elastic APM when looking at the transactions section of your Python service:\\n\\n![graph-1](/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-4-graph1.png)\\n\\nNotice how this is slightly different from the auto-instrumented version, as we now also have our custom span in this view.\\n\\n![graph-2](/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-5-graph2.png)\\n\\n## Is it worth it?\\n\\nThis is the million-dollar question. Depending on what level of detail you need, it\'s potentially necessary to manually instrument. Manual instrumentation lets you add custom spans, custom labels, and metrics where you want or need them. It allows you to get a level of detail that otherwise would not be possible and is oftentimes important for tracking business-specific KPIs.\\n\\nYour operations, and whether you need to troubleshoot or analyze the performance of specific parts of the code, will dictate when and what to instrument. But it’s helpful to know that you have the option to manually instrument.\\n\\nIf you noticed we didn’t yet instrument metrics, that is another blog. We discussed logs in a [previous blog](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic).\\n\\n## Conclusion\\n\\nIn this blog, we discussed the following:\\n\\n- How to manually instrument Python with OpenTelemetry\\n- How to properly initialize OpenTelemetry and add a custom span\\n- How to easily set the OTLP ENDPOINT and OTLP HEADERS with Elastic without the need for a collector\\n\\nHopefully, this provides an easy-to-understand walk-through of instrumenting Python with OpenTelemetry and how easy it is to send traces into Elastic.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-python-apps-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for instrumenting OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var h=Object.create;var i=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),v=(n,e)=>{for(var r in e)i(n,r,{get:e[r],enumerable:!0})},s=(n,e,r,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!f.call(n,o)&&o!==r&&i(n,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return n};var w=(n,e,r)=>(r=n!=null?h(g(n)):{},s(e||!n||!n.__esModule?i(r,\\"default\\",{value:n,enumerable:!0}):r,n)),_=n=>s(i({},\\"__esModule\\",{value:!0}),n);var c=y((O,l)=>{l.exports=_jsx_runtime});var E={};v(E,{default:()=>d,frontmatter:()=>b});var t=w(c()),b={title:\\"Manual instrumentation with OpenTelemetry for Python applications\\",slug:\\"manual-instrumentation-python-apps-opentelemetry\\",date:\\"2023-08-31\\",description:\\"In this blog post, we will show you how to manually instrument Python applications using OpenTelemetry. We will explore how to use the proper OpenTelemetry Python libraries and in particular work on instrumenting tracing in a Python application.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"observability-launch-series-2-python-manual_(1).jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"python\\"},{slug:\\"apm\\"},{slug:\\"instrumentation\\"}]};function p(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"DevOps and SRE teams are transforming the process of software development. While DevOps engineers focus on efficient software applications and service delivery, SRE teams are key to ensuring reliability, scalability, and performance. These teams must rely on a full-stack observability solution that allows them to manage and monitor systems and ensure issues are resolved before they impact the business.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Observability across the entire stack of modern distributed applications requires data collection, processing, and correlation often in the form of dashboards. Ingesting all system data requires installing agents across stacks, frameworks, and providers \\\\u2014 a process that can be challenging and time-consuming for teams who have to deal with version changes, compatibility issues, and proprietary code that doesn\'t scale as systems change.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Thanks to \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" (OTel), DevOps and SRE teams now have a standard way to collect and send data that doesn\'t rely on proprietary code and have a large support community reducing vendor lock-in.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In a \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\", we also reviewed how to use the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry demo\\"}),\\" and connect it to Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\", as well as some of Elastic\\\\u2019s capabilities with OpenTelemetry and Kubernetes.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this blog, we will show how to use \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/python/manual/\\",rel:\\"nofollow\\",children:\\"manual instrumentation for OpenTelemetry\\"}),\\" with the Python service of our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"application called Elastiflix\\"}),\\". This approach is slightly more complex than using \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"automatic instrumentation\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The beauty of this is that there is \\",(0,t.jsx)(e.strong,{children:\\"no need for the otel-collector\\"}),\\"! This setup enables you to slowly and easily migrate an application to OTel with Elastic according to a timeline that best fits your business.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"application-prerequisites-and-config\\",children:\\"Application, prerequisites, and config\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The application that we use for this blog is called \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix\\"}),\\", a movie streaming application. It consists of several micro-services written in .NET, NodeJS, Go, and Python.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we instrument our sample application, we will first need to understand how Elastic can receive the telemetry data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-1-config.png\\",alt:\\"configuration\\",width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data. Some of these include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Dependencies between services, distributed tracing\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Machine learning (ML) correlations\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Log correlation\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and a unified view of the telemetry data, you will also be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"An Elastic Cloud account \\\\u2014 \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"sign up now\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"A clone of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\", or your own Python application\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Basic understanding of Docker \\\\u2014 potentially install \\",(0,t.jsx)(e.a,{href:\\"https://www.docker.com/products/docker-desktop/\\",rel:\\"nofollow\\",children:\\"Docker Desktop\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Basic understanding of Python\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"view-the-example-source-code\\",children:\\"View the example source code\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The full source code, including the Dockerfile used in this blog, can be found on \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-auto\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\". The repository also contains the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite\\",rel:\\"nofollow\\",children:\\"same application without instrumentation\\"}),\\". This allows you to compare each file and see the differences.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The following steps will show you how to instrument this application and run it on the command line or in Docker. If you are interested in a more complete OTel example, take a look at the docker-compose file \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main#start-the-app\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", which will bring up the full project.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before we begin, let\\\\u2019s look at the non-instrumented code first.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This is our simple Python Flask application that can receive a GET request. (This is a portion of the full \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/blob/main/Elastiflix/python-favorite/main.py\\",rel:\\"nofollow\\",children:\\"main.py\\"}),\\" file.)\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`from flask import Flask, request\\nimport sys\\n\\nimport logging\\nimport redis\\nimport os\\nimport ecs_logging\\nimport datetime\\nimport random\\nimport time\\n\\nredis_host = os.environ.get(\'REDIS_HOST\') or \'localhost\'\\nredis_port = os.environ.get(\'REDIS_PORT\') or 6379\\n\\napplication_port = os.environ.get(\'APPLICATION_PORT\') or 5000\\n\\napp = Flask(__name__)\\n\\n# Get the Logger\\nlogger = logging.getLogger(\\"app\\")\\nlogger.setLevel(logging.DEBUG)\\n\\n# Add an ECS formatter to the Handler\\nhandler = logging.StreamHandler()\\nhandler.setFormatter(ecs_logging.StdlibFormatter())\\nlogger.addHandler(handler)\\nlogging.getLogger(\'werkzeug\').setLevel(logging.ERROR)\\nlogging.getLogger(\'werkzeug\').addHandler(handler)\\n\\nr = redis.Redis(host=redis_host, port=redis_port, decode_responses=True)\\n\\n@app.route(\'/favorites\', methods=[\'GET\'])\\ndef get_favorite_movies():\\n user_id = str(request.args.get(\'user_id\'))\\n\\n logger.info(\'Getting favorites for user \' + user_id, extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": request.args.get(\'user_id\')\\n })\\n\\n favorites = r.smembers(user_id)\\n\\n # convert to list\\n favorites = list(favorites)\\n logger.info(\'User \' + user_id + \' has favorites: \' + str(favorites), extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": user_id\\n })\\n return { \\"favorites\\": favorites}\\n\\nlogger.info(\'App startup\')\\napp.run(host=\'0.0.0.0\', port=application_port)\\nlogger.info(\'App Stopped\')\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"step-by-step-guide\\",children:\\"Step-by-step guide\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-0-log-in-to-your-elastic-cloud-account\\",children:\\"Step 0. Log in to your Elastic Cloud account\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This blog assumes you have an Elastic Cloud account \\\\u2014 if not, follow the \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"instructions to get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-2-trial.png\\",alt:\\"trial\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-install-and-initialize-opentelemetry\\",children:\\"Step 1. Install and initialize OpenTelemetry\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As a first step, we\\\\u2019ll need to add some additional libraries to our application.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`from opentelemetry import trace\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\n\\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\\nfrom opentelemetry.instrumentation.redis import RedisInstrumentor\\nfrom opentelemetry.sdk.resources import Resource\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This code imports necessary OpenTelemetry libraries, including those for tracing, exporting, and instrumenting specific libraries like Flask, Requests, and Redis.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Next we read the variables:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_HEADERS\\nOTEL_EXPORTER_OTLP_ENDPOINT\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And then initialize the exporter.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`otel_exporter_otlp_headers = os.environ.get(\'OTEL_EXPORTER_OTLP_HEADERS\')\\n\\notel_exporter_otlp_endpoint = os.environ.get(\'OTEL_EXPORTER_OTLP_ENDPOINT\')\\n\\nexporter = OTLPSpanExporter(endpoint=otel_exporter_otlp_endpoint, headers=otel_exporter_otlp_headers)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In order to pass additional parameters to OpenTelemetry, we will read the OTEL_RESOURCE_ATTRIBUTES variable and convert it into an object.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`resource_attributes = os.environ.get(\'OTEL_RESOURCE_ATTRIBUTES\') or \'service.version=1.0,deployment.environment=production\'\\nkey_value_pairs = resource_attributes.split(\',\')\\nresult_dict = {}\\n\\nfor pair in key_value_pairs:\\n key, value = pair.split(\'=\')\\n result_dict[key] = value\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Next, we will then use these parameters to populate the resources configuration.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`resourceAttributes = {\\n \\"service.name\\": otel_service_name,\\n \\"service.version\\": result_dict[\'service.version\'],\\n \\"deployment.environment\\": result_dict[\'deployment.environment\']\\n}\\n\\nresource = Resource.create(resourceAttributes)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We then set up the trace provider using the previously created resource. The trace provider will allow us to create spans later after getting a tracer instance from it.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Additionally, we specify the use of BatchSPanProcessor. The Span processor is an interface that allows hooks for span start and end method invocations.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In OpenTelemetry, different Span processors are offered. The BatchSPanProcessor batches span and sends them in bulk. Multiple Span processors can be configured to be active at the same time using the MultiSpanProcessor. \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/manual/#span-processor\\",rel:\\"nofollow\\",children:\\"See OpenTelemetry documentation\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Additionally, we added the resource module. This allows us to specify attributes such as service.name, version, and more. See \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/resource/semantic_conventions/#semantic-attributes-with-sdk-provided-default-value\\",rel:\\"nofollow\\",children:\\"OpenTelemetry semantic conventions documentation\\"}),\\" for more details.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`provider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(exporter)\\nprovider.add_span_processor(processor)\\n\\n# Sets the global default tracer provider\\ntrace.set_tracer_provider(provider)\\n\\n# Creates a tracer from the global tracer provider\\ntracer = trace.get_tracer(otel_service_name)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Finally, because we are using Flask and Redis, we also add the following, which allows us to automatically instrument both Flask and Redis.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Technically you could consider this \\\\u201Ccheating.\\\\u201D We are using some parts of the Python auto-instrumentation. However, it\\\\u2019s generally a good approach to resort to using some of the auto-instrumentation modules. This saves you a lot of time, and in addition, it ensures that functionality like distributed tracing will work automatically for any requests you receive or send.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`FlaskInstrumentor().instrument_app(app)\\nRequestsInstrumentor().instrument()\\nRedisInstrumentor().instrument()\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-adding-custom-spans\\",children:\\"Step 2. Adding Custom Spans\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that we have everything added and initialized, we can add custom spans.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If we want to have additional instrumentation for a part of our app, we simply wrap the /favoritesGET function code using Python with:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`with tracer.start_as_current_span(\\"add_favorite_movies\\", set_status_on_exception=True) as span:\\n ...\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The wrapped code is as follows:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`@app.route(\'/favorites\', methods=[\'GET\'])\\ndef get_favorite_movies():\\n # add artificial delay if enabled\\n if delay_time > 0:\\n time.sleep(max(0, random.gauss(delay_time/1000, delay_time/1000/10)))\\n\\n with tracer.start_as_current_span(\\"get_favorite_movies\\") as span:\\n user_id = str(request.args.get(\'user_id\'))\\n\\n logger.info(\'Getting favorites for user \' + user_id, extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": request.args.get(\'user_id\')\\n })\\n\\n favorites = r.smembers(user_id)\\n\\n # convert to list\\n favorites = list(favorites)\\n logger.info(\'User \' + user_id + \' has favorites: \' + str(favorites), extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": user_id\\n })\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Additional code\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to modules and span instrumentation, the sample application also checks some environment variables at startup. When sending data to Elastic without an OTel collector, the OTEL_EXPORTER_OTLP_HEADERS variable is required as it contains the authentication. The same is true for OTEL_EXPORTER_OTLP_ENDPOINT, the host where we\\\\u2019ll send the telemetry data.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`otel_exporter_otlp_headers = os.environ.get(\'OTEL_EXPORTER_OTLP_HEADERS\')\\n# fail if secret token not set\\nif otel_exporter_otlp_headers is None:\\n raise Exception(\'OTEL_EXPORTER_OTLP_HEADERS environment variable not set\')\\n\\n\\notel_exporter_otlp_endpoint = os.environ.get(\'OTEL_EXPORTER_OTLP_ENDPOINT\')\\n# fail if server url not set\\nif otel_exporter_otlp_endpoint is None:\\n raise Exception(\'OTEL_EXPORTER_OTLP_ENDPOINT environment variable not set\')\\nelse:\\n exporter = OTLPSpanExporter(endpoint=otel_exporter_otlp_endpoint, headers=otel_exporter_otlp_headers)\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Final code\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"For comparison, this is the instrumented code of our sample application. You can find the full source code in \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/python-favorite-otel-manual\\",rel:\\"nofollow\\",children:\\"GitHub\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`from flask import Flask, request\\nimport sys\\n\\nimport logging\\nimport redis\\nimport os\\nimport ecs_logging\\nimport datetime\\nimport random\\nimport time\\n\\nfrom opentelemetry import trace\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\n\\n#Using grpc exporter since per the instructions in OTel docs this is needed for any endpoint receiving OTLP.\\n\\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\nfrom opentelemetry.instrumentation.flask import FlaskInstrumentor\\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\\nfrom opentelemetry.instrumentation.redis import RedisInstrumentor\\n#from opentelemetry.instrumentation.wsgi import OpenTelemetryMiddleware\\nfrom opentelemetry.sdk.resources import Resource\\n\\nredis_host = os.environ.get(\'REDIS_HOST\') or \'localhost\'\\nredis_port = os.environ.get(\'REDIS_PORT\') or 6379\\notel_traces_exporter = os.environ.get(\'OTEL_TRACES_EXPORTER\') or \'otlp\'\\notel_metrics_exporter = os.environ.get(\'OTEL_TRACES_EXPORTER\') or \'otlp\'\\nenvironment = os.environ.get(\'ENVIRONMENT\') or \'dev\'\\notel_service_version = os.environ.get(\'OTEL_SERVICE_VERSION\') or \'1.0.0\'\\nresource_attributes = os.environ.get(\'OTEL_RESOURCE_ATTRIBUTES\') or \'service.version=1.0,deployment.environment=production\'\\n\\notel_exporter_otlp_headers = os.environ.get(\'OTEL_EXPORTER_OTLP_HEADERS\')\\n# fail if secret token not set\\nif otel_exporter_otlp_headers is None:\\n raise Exception(\'OTEL_EXPORTER_OTLP_HEADERS environment variable not set\')\\n#else:\\n# otel_exporter_otlp_fheaders= f\\"Authorization=Bearer%20{secret_token}\\"\\n\\notel_exporter_otlp_endpoint = os.environ.get(\'OTEL_EXPORTER_OTLP_ENDPOINT\')\\n# fail if server url not set\\nif otel_exporter_otlp_endpoint is None:\\n raise Exception(\'OTEL_EXPORTER_OTLP_ENDPOINT environment variable not set\')\\nelse:\\n exporter = OTLPSpanExporter(endpoint=otel_exporter_otlp_endpoint, headers=otel_exporter_otlp_headers)\\n\\n\\nkey_value_pairs = resource_attributes.split(\',\')\\nresult_dict = {}\\n\\nfor pair in key_value_pairs:\\n key, value = pair.split(\'=\')\\n result_dict[key] = value\\n\\nresourceAttributes = {\\n \\"service.name\\": result_dict[\'service.name\'],\\n \\"service.version\\": result_dict[\'service.version\'],\\n \\"deployment.environment\\": result_dict[\'deployment.environment\']\\n# # Add more attributes as needed\\n}\\n\\nresource = Resource.create(resourceAttributes)\\n\\n\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(exporter)\\nprovider.add_span_processor(processor)\\n\\n# Sets the global default tracer provider\\ntrace.set_tracer_provider(provider)\\n\\n# Creates a tracer from the global tracer provider\\ntracer = trace.get_tracer(\\"favorite\\")\\n\\n\\napplication_port = os.environ.get(\'APPLICATION_PORT\') or 5000\\n\\napp = Flask(__name__)\\n\\n\\nFlaskInstrumentor().instrument_app(app)\\n#OpenTelemetryMiddleware().instrument()\\nRequestsInstrumentor().instrument()\\nRedisInstrumentor().instrument()\\n\\n#app.wsgi_app = OpenTelemetryMiddleware(app.wsgi_app)\\n\\n# Get the Logger\\nlogger = logging.getLogger(\\"app\\")\\nlogger.setLevel(logging.DEBUG)\\n\\n# Add an ECS formatter to the Handler\\nhandler = logging.StreamHandler()\\nhandler.setFormatter(ecs_logging.StdlibFormatter())\\nlogger.addHandler(handler)\\nlogging.getLogger(\'werkzeug\').setLevel(logging.ERROR)\\nlogging.getLogger(\'werkzeug\').addHandler(handler)\\n\\nr = redis.Redis(host=redis_host, port=redis_port, decode_responses=True)\\n\\n@app.route(\'/favorites\', methods=[\'GET\'])\\ndef get_favorite_movies():\\n with tracer.start_as_current_span(\\"get_favorite_movies\\") as span:\\n user_id = str(request.args.get(\'user_id\'))\\n\\n logger.info(\'Getting favorites for user \' + user_id, extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": request.args.get(\'user_id\')\\n })\\n\\n favorites = r.smembers(user_id)\\n\\n # convert to list\\n favorites = list(favorites)\\n logger.info(\'User \' + user_id + \' has favorites: \' + str(favorites), extra={\\n \\"event.dataset\\": \\"favorite.log\\",\\n \\"user.id\\": user_id\\n })\\n return { \\"favorites\\": favorites}\\n\\nlogger.info(\'App startup\')\\napp.run(host=\'0.0.0.0\', port=application_port)\\nlogger.info(\'App Stopped\')\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-3-running-the-docker-image-with-environment-variables\\",children:\\"Step 3. Running the Docker image with environment variables\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As specified in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/python/automatic/#configuring-the-agent\\",rel:\\"nofollow\\",children:\\"OTEL documentation\\"}),\\", we will use environment variables and pass in the configuration values to enable it to connect with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-open-telemetry.html\\",rel:\\"nofollow\\",children:\\"Elastic Observability\\\\u2019s APM server\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Because Elastic accepts OTLP natively, we just need to provide the Endpoint and authentication where the OTEL Exporter needs to send the data, as well as some other environment variables.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Getting Elastic Cloud variables\\"}),(0,t.jsx)(e.br,{}),`\\n`,\\"You can copy the endpoints and token from Kibana\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" under the path \\",(0,t.jsx)(e.code,{children:\\"/app/home#/tutorial/apm\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-3-apm.png\\",alt:\\"apm agents\\",width:\\"1924\\",height:\\"1304\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You will need to copy the following environment variables:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`OTEL_EXPORTER_OTLP_ENDPOINT\\nOTEL_EXPORTER_OTLP_HEADERS\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Build the image\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker build -t python-otel-manual-image .\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Run the image\\"})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`docker run \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_ENDPOINT=\\"\\" \\\\\\\\\\n -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer \\" \\\\\\\\\\n -e OTEL_RESOURCE_ATTRIBUTES=\\"service.version=1.0,deployment.environment=production,service.name=python-favorite-otel-manual\\" \\\\\\\\\\n -p 3001:3001 \\\\\\\\\\n python-otel-manual-image\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can now issue a few requests in order to generate trace data. Note that these requests are expected to return an error, as this service relies on a connection to Redis that you don\\\\u2019t currently have running. As mentioned before, you can find a more complete example using docker-compose \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`curl localhost:500/favorites\\n# or alternatively issue a request every second\\n\\nwhile true; do curl \\"localhost:5000/favorites\\"; sleep 1; done;\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-4-explore-traces-metrics-and-logs-in-elastic-apm\\",children:\\"Step 4. Explore traces, metrics, and logs in Elastic APM\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that the service is instrumented, you should see the following output in Elastic APM when looking at the transactions section of your Python service:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-4-graph1.png\\",alt:\\"graph-1\\",width:\\"1999\\",height:\\"1125\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Notice how this is slightly different from the auto-instrumented version, as we now also have our custom span in this view.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/manual-instrumentation-python-apps-opentelemetry/elastic-blog-5-graph2.png\\",alt:\\"graph-2\\",width:\\"1999\\",height:\\"1125\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"is-it-worth-it\\",children:\\"Is it worth it?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is the million-dollar question. Depending on what level of detail you need, it\'s potentially necessary to manually instrument. Manual instrumentation lets you add custom spans, custom labels, and metrics where you want or need them. It allows you to get a level of detail that otherwise would not be possible and is oftentimes important for tracking business-specific KPIs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Your operations, and whether you need to troubleshoot or analyze the performance of specific parts of the code, will dictate when and what to instrument. But it\\\\u2019s helpful to know that you have the option to manually instrument.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you noticed we didn\\\\u2019t yet instrument metrics, that is another blog. We discussed logs in a \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we discussed the following:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"How to manually instrument Python with OpenTelemetry\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"How to properly initialize OpenTelemetry and add a custom span\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"How to easily set the OTLP ENDPOINT and OTLP HEADERS with Elastic without the need for a collector\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Hopefully, this provides an easy-to-understand walk-through of instrumenting Python with OpenTelemetry and how easy it is to send traces into Elastic.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-python-apps-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Go: \\",(0,t.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for instrumenting OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(p,{...n})}):p(n)}return _(E);})();\\n;return Component;"},"_id":"articles/manual-instrumentation-of-python-applications-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/manual-instrumentation-of-python-applications-opentelemetry.mdx","sourceFileName":"manual-instrumentation-of-python-applications-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/manual-instrumentation-of-python-applications-opentelemetry"},"type":"Article","imageUrl":"/assets/images/manual-instrumentation-python-apps-opentelemetry/observability-launch-series-2-python-manual_(1).jpg","readingTime":"15 min read","url":"/manual-instrumentation-python-apps-opentelemetry","headings":[{"level":2,"title":"Application, prerequisites, and config","href":"#application-prerequisites-and-config"},{"level":2,"title":"Prerequisites","href":"#prerequisites"},{"level":2,"title":"View the example source code","href":"#view-the-example-source-code"},{"level":2,"title":"Step-by-step guide","href":"#step-by-step-guide"},{"level":3,"title":"Step 0. Log in to your Elastic Cloud account","href":"#step-0-log-in-to-your-elastic-cloud-account"},{"level":3,"title":"Step 1. Install and initialize OpenTelemetry","href":"#step-1-install-and-initialize-opentelemetry"},{"level":3,"title":"Step 2. Adding Custom Spans","href":"#step-2-adding-custom-spans"},{"level":3,"title":"Step 3. Running the Docker image with environment variables","href":"#step-3-running-the-docker-image-with-environment-variables"},{"level":3,"title":"Step 4. Explore traces, metrics, and logs in Elastic APM","href":"#step-4-explore-traces-metrics-and-logs-in-elastic-apm"},{"level":2,"title":"Is it worth it?","href":"#is-it-worth-it"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Migrating 1 billion log lines from OpenSearch to Elasticsearch","slug":"migrating-billion-log-lines-opensearch-elasticsearch","date":"2023-10-11","description":"Learn how to migrate 1 billion log lines from OpenSearch to Elasticsearch for improved performance and reduced disk usage. Discover the migration strategies, data transfer methods, and optimization techniques used in this guide.","image":"elastic-blog-header-1-billion-log-lines.png","author":[{"slug":"ugo-sangiorgi","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"opensearch","type":"Tag","_raw":{}}],"body":{"raw":"\\nWhat are the current options to migrate from OpenSearch to Elasticsearch\xae?\\n\\nOpenSearch is a fork of Elasticsearch 7.10 that has diverged quite a bit from itself lately, resulting in a different set of features and also different performance, as [this benchmark](https://www.elastic.co/blog/elasticsearch-opensearch-performance-gap) shows (hint: it’s currently much slower than Elasticsearch).\\n\\nGiven the differences between the two solutions, restoring a snapshot from OpenSearch is not possible, nor is reindex-from-remote, so our only option is then using something in between that will read from OpenSearch and write to Elasticsearch.\\n\\nThis blog will show you how easy it is to migrate from OpenSearch to Elasticsearch for better performance and less disk usage!\\n\\n![1 - arrows](/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/blog-elastic-348gb-disk-space-logs.jpg)\\n\\n## 1 billion log lines\\n\\nWe are going to use part of the data set we used for the benchmark, which takes about half a terabyte on disk, including replicas, and spans over a week ( January 1–7, 2023).\\n\\nWe have in total 1,009,165,775 documents that take **453.5GB** of space in OpenSearch, including the replicas. That’s **241.2KB per document**. This is going to be important later when we enable a couple optimizations in Elasticsearch that will bring this total size way down without sacrificing performance!\\n\\nThis billion log line data set is spread over nine indices that are part of a datastream we are calling logs-myapplication-prod. We have primary shards of about 25GB in size, according to the best practices for optimal shard sizing. A GET \\\\_cat/indices show us the indices we are dealing with:\\n\\n```bash\\nindex docs.count pri rep pri.store.size store.size\\n.ds-logs-myapplication-prod-000049 102519334 1 1 22.1gb 44.2gb\\n.ds-logs-myapplication-prod-000048 114273539 1 1 26.1gb 52.3gb\\n.ds-logs-myapplication-prod-000044 111093596 1 1 25.4gb 50.8gb\\n.ds-logs-myapplication-prod-000043 113821016 1 1 25.7gb 51.5gb\\n.ds-logs-myapplication-prod-000042 113859174 1 1 24.8gb 49.7gb\\n.ds-logs-myapplication-prod-000041 112400019 1 1 25.7gb 51.4gb\\n.ds-logs-myapplication-prod-000040 113362823 1 1 25.9gb 51.9gb\\n.ds-logs-myapplication-prod-000038 110994116 1 1 25.3gb 50.7gb\\n.ds-logs-myapplication-prod-000037 116842158 1 1 25.4gb 50.8gb\\n```\\n\\nBoth OpenSearch and Elasticsearch clusters have the same configuration: 3 nodes with 64GB RAM and 12 CPU cores. Just like in the [benchmark](https://www.elastic.co/blog/elasticsearch-opensearch-performance-gap), the clusters are running in Kubernetes.\\n\\n## Moving data from A to B\\n\\nTypically, moving data from one Elasticsearch cluster to another is easy as a [snapshot and restore](https://www.elastic.co/guide/en/elasticsearch/reference/current/snapshot-restore.html) if the clusters are compatible versions of each other or a [reindex from remote](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html#reindex-from-remote) if you need real-time synchronization and minimized downtime. These methods do not apply when migrating data from OpenSearch to Elasticsearch because the projects have significantly diverged from the 7.10 fork. However, there is one method that will work: scrolling.\\n\\n### Scrolling\\n\\nScrolling involves using an external tool, such as Logstash\xae, to read data from the source cluster and write it to the destination cluster. This method provides a high degree of customization, allowing us to transform the data during the migration process if needed. Here are a couple advantages of using Logstash:\\n\\n- **Easy parallelization:** It’s really easy to write concurrent jobs that can read from different “slices” of the indices, essentially maximizing our throughput.\\n- **Queuing:** Logstash automatically queues documents before sending.\\n- **Automatic retries:** In the event of a failure or an error during data transmission, Logstash will automatically attempt to resend the data; moreover, it will stop querying the source cluster as often, until the connection is re-established, all without manual intervention.\\n\\nScrolling allows us to do an initial search and to keep pulling batches of results from Elasticsearch until there are no more results left, similar to how a “cursor” works in relational databases.\\n\\nA [scrolled search](https://www.elastic.co/guide/en/elasticsearch/guide/master/scroll.html) takes a snapshot in time by freezing the segments that make the index up until the time the request is made, preventing those segments from merging. As a result, the scroll doesn’t see any changes that are made to the index after the initial search request has been made.\\n\\n### Migration strategies\\n\\nReading from A and writing in B in can be slow without optimization because it involves paginating through the results, transferring each batch over the network to Logstash, which will assemble the documents in another batch and then transfer those batches over the network again to Elasticsearch, where the documents will be indexed. So when it comes to such large data sets, we must be very efficient and extract every bit of performance where we can.\\n\\nLet’s start with the facts — what do we know about the data we need to transfer? We have nine indices in the datastream, each with about 100 million documents. Let’s test with just one of the indices and measure the indexing rate to see how long it takes to migrate. The indexing rate can be seen by activating the [monitoring](https://www.elastic.co/guide/en/elasticsearch/reference/current/monitoring-overview.html) functionality in Elastic\xae and then navigating to the index you want to inspect.\\n\\n**Scrolling in the deep** \\nThe simplest approach for transferring the log lines over would be to make Elasticsearch scroll over the entire data set and check it later when it finishes. Here we will introduce our first two variables: PAGE_SIZE and BATCH_SIZE. The former is how many records we are going to bring from the source every time we query it, and the latter is how many documents are going to be assembled together by Logstash and written to the destination index.\\n\\n![Deep scrolling](/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/elastic-blog-2-scrolling-in-the-deep.jpg)\\n\\nWith such a large data set, the scroll slows down as this deep pagination progresses. The indexing rate starts at 6,000 docs/second and steadily descends down to 700 docs/second because the pagination gets very deep. Without any optimization, it would take us 19 days (!) to migrate the 1 billion documents. We can do better than that!\\n\\n![Indexing rate for a deep scroll](/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/elastic-blog-3-index-rate.png)\\n\\n**Slice me nice** \\nWe can optimize scrolling by using an approach called [Sliced scroll](https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html#slice-scroll), where we split the index in different slices to consume them independently.\\n\\nHere we will introduce our last two variables: SLICES and WORKERS. The amount of slices cannot be too small as the performance decreases drastically over time, and it can’t be too big as the overhead of maintaining the scrolls would counter the benefits of a smaller search.\\n\\n![Sliced scroll](/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/elastic-blog-4-slice-me-nice.jpg)\\n\\nLet’s start by migrating a single index (out of the nine we have) with different parameters to see what combination gives us the highest throughput.\\n\\n| | | | | |\\n| ------ | --------- | ------- | ---------- | --------------------- |\\n| SLICES | PAGE_SIZE | WORKERS | BATCH_SIZE | Average Indexing Rate |\\n| 3 | 500 | 3 | 500 | 13,319 docs/sec |\\n| 3 | 1,000 | 3 | 1,000 | 13,048 docs/sec |\\n| 4 | 250 | 4 | 250 | 10,199 docs/sec |\\n| 4 | 500 | 4 | 500 | 12,692 docs/sec |\\n| 4 | 1,000 | 4 | 1,000 | 10,900 docs/sec |\\n| 5 | 500 | 5 | 500 | 12,647 docs/sec |\\n| 5 | 1,000 | 5 | 1,000 | 10,334 docs/sec |\\n| 5 | 2,000 | 5 | 2,000 | 10,405 docs/sec |\\n| 10 | 250 | 10 | 250 | 14,083 docs/sec |\\n| 10 | 250 | 4 | 1,000 | 12,014 docs/sec |\\n| 10 | 500 | 4 | 1,000 | 10,956 docs/sec |\\n\\nIt looks like we have a good set of candidates for maximizing the throughput for a single index, in between 12K and 14K documents per second. That doesn\'t mean we have reached our ceiling. Even though search operations are single threaded and every slice will trigger sequential search operations to read data, that does not prevent us from reading several indices in parallel.\\n\\nBy default, the maximum number of open scrolls is 500 — this limit can be updated with the search.max_open_scroll_context cluster setting, but the default value is enough for this particular migration.\\n\\n![5 - indexing rate](/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/elastic-blog-5-index-rate-volatile.png)\\n\\n## Let’s migrate\\n\\n### Preparing our destination indices\\n\\nWe are going to create a datastream called logs-myapplication-reindex to write the data to, but before indexing any data, let’s ensure our [index template](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html) and [index lifecycle management](https://www.elastic.co/guide/en/elasticsearch/reference/current/ilm-index-lifecycle.html) configurations are properly set up. An index template acts as a blueprint for creating new indices, allowing you to define various settings that should be applied consistently across your indices.\\n\\n**Index lifecycle management policy** \\nIndex lifecycle management (ILM) is equally vital, as it automates the management of indices throughout their lifecycle. With ILM, you can define policies that determine how long data should be retained, when it should be rolled over into new indices, and when old indices should be deleted or archived. Our policy is really straightforward:\\n\\n```bash\\nPUT _ilm/policy/logs-myapplication-lifecycle-policy\\n{\\n \\"policy\\": {\\n \\"phases\\": {\\n \\"hot\\": {\\n \\"actions\\": {\\n \\"rollover\\": {\\n \\"max_primary_shard_size\\": \\"25gb\\"\\n }\\n }\\n },\\n \\"warm\\": {\\n \\"min_age\\": \\"0d\\",\\n \\"actions\\": {\\n \\"forcemerge\\": {\\n \\"max_num_segments\\": 1\\n }\\n }\\n }\\n }\\n }\\n}\\n```\\n\\n**Index template (and saving 23% in disk space)** \\nSince we are here, we’re going to go ahead and enable [Synthetic Source](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#synthetic-source), a clever feature that allows us to store and discard the original JSON document while still reconstructing it when needed from the stored fields.\\n\\nFor our example, enabling Synthetic Source resulted in a remarkable **23.4% improvement in storage efficiency** , reducing the size required to store a single document from 241.2KB in OpenSearch to just **185KB** in Elasticsearch.\\n\\nOur full index template is therefore:\\n\\n```bash\\nPUT _index_template/logs-myapplication-reindex\\n{\\n \\"index_patterns\\": [\\n \\"logs-myapplication-reindex\\"\\n ],\\n \\"priority\\": 500,\\n \\"data_stream\\": {},\\n \\"template\\": {\\n \\"settings\\": {\\n \\"index\\": {\\n \\"lifecycle.name\\": \\"logs-myapplication-lifecycle-policy\\",\\n \\"codec\\": \\"best_compression\\",\\n \\"number_of_shards\\": \\"1\\",\\n \\"number_of_replicas\\": \\"1\\",\\n \\"query\\": {\\n \\"default_field\\": [\\n \\"message\\"\\n ]\\n }\\n }\\n },\\n \\"mappings\\": {\\n \\"_source\\": {\\n \\"mode\\": \\"synthetic\\"\\n },\\n \\"_data_stream_timestamp\\": {\\n \\"enabled\\": true\\n },\\n \\"date_detection\\": false,\\n \\"properties\\": {\\n \\"@timestamp\\": {\\n \\"type\\": \\"date\\"\\n },\\n \\"agent\\": {\\n \\"properties\\": {\\n \\"ephemeral_id\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"id\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"name\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"type\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"version\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"aws\\": {\\n \\"properties\\": {\\n \\"cloudwatch\\": {\\n \\"properties\\": {\\n \\"ingestion_time\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"log_group\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"log_stream\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n }\\n }\\n },\\n \\"cloud\\": {\\n \\"properties\\": {\\n \\"region\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"data_stream\\": {\\n \\"properties\\": {\\n \\"dataset\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"namespace\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"type\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"ecs\\": {\\n \\"properties\\": {\\n \\"version\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"event\\": {\\n \\"properties\\": {\\n \\"dataset\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"id\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"ingested\\": {\\n \\"type\\": \\"date\\"\\n }\\n }\\n },\\n \\"host\\": {\\n \\"type\\": \\"object\\"\\n },\\n \\"input\\": {\\n \\"properties\\": {\\n \\"type\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"log\\": {\\n \\"properties\\": {\\n \\"file\\": {\\n \\"properties\\": {\\n \\"path\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n }\\n }\\n },\\n \\"message\\": {\\n \\"type\\": \\"match_only_text\\"\\n },\\n \\"meta\\": {\\n \\"properties\\": {\\n \\"file\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"metrics\\": {\\n \\"properties\\": {\\n \\"size\\": {\\n \\"type\\": \\"long\\"\\n },\\n \\"tmin\\": {\\n \\"type\\": \\"long\\"\\n }\\n }\\n },\\n \\"process\\": {\\n \\"properties\\": {\\n \\"name\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"tags\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n }\\n }\\n}\\n```\\n\\n### Building a custom Logstash image\\n\\nWe are going to use a containerized Logstash for this migration because both clusters are sitting on a Kubernetes infrastructure, so it\'s easier to just spin up a Pod that will communicate to both clusters.\\n\\nSince OpenSearch is not an official Logstash input, we must build a custom Logstash image that contains the logstash-input-opensearch plugin. Let’s use the base image from docker.elastic.co/logstash/logstash:8.16.1 and just install the plugin:\\n\\n```dockerfile\\nFROM docker.elastic.co/logstash/logstash:8.16.1\\n\\nUSER logstash\\nWORKDIR /usr/share/logstash\\nRUN bin/logstash-plugin install logstash-input-opensearch\\n```\\n\\n### Writing a Logstash pipeline\\n\\nNow we have our Logstash Docker image, and we need to write a pipeline that will read from OpenSearch and write to Elasticsearch.\\n\\n**The** **input**\\n\\n```ruby\\ninput {\\n opensearch {\\n hosts => [\\"os-cluster:9200\\"]\\n ssl => true\\n ca_file => \\"/etc/logstash/certificates/opensearch-ca.crt\\"\\n user => \\"${OPENSEARCH_USERNAME}\\"\\n password => \\"${OPENSEARCH_PASSWORD}\\"\\n index => \\"${SOURCE_INDEX_NAME}\\"\\n slices => \\"${SOURCE_SLICES}\\"\\n size => \\"${SOURCE_PAGE_SIZE}\\"\\n scroll => \\"5m\\"\\n docinfo => true\\n docinfo_target => \\"[@metadata][doc]\\"\\n }\\n}\\n```\\n\\nLet’s break down the most important input parameters. The values are all represented as environment variables here:\\n\\n- **hosts:** Specifies the host and port of the OpenSearch cluster. In this case, it’s connecting to “os-cluster” on port 9200.\\n- **index:** Specifies the index in the OpenSearch cluster from which to retrieve logs. In this case, it’s “logs-myapplication-prod” which is a datastream that contains the actual indices (e.g., .ds-logs-myapplication-prod-000049).\\n- **size:** Specifies the maximum number of logs to retrieve in each request.\\n- **scroll:** Defines how long a search context will be kept open on the OpenSearch server. In this case, it’s set to “5m,” which means each request must be answered and a new “page” asked within five minutes.\\n- **docinfo** and **docinfo_target:** These settings control whether document metadata should be included in the Logstash output and where it should be stored. In this case, document metadata is being stored in the [@metadata][doc] field — this is important because the document’s \\\\_id will be used as the destination id as well.\\n\\nThe ssl and ca_file are highly recommended if you are migrating from clusters that are in a different infrastructure (separate cloud providers). You don’t need to specify a ca_file if your TLS certificates are signed by a public authority, which is likely the case if you are using a SaaS and your endpoint is reachable over the internet. In this case, only ssl =\\\\> true would suffice. In our case, all our TLS certificates are self-signed, so we must also provide the Certificate Authority (CA) certificate.\\n\\n**The (optional)** **filter** \\nWe could use this to drop or alter the documents to be written to Elasticsearch if we wanted, but we are not going to, as we want to migrate the documents as is. We are only removing extra metadata fields that Logstash includes in all documents, such as \\"@version\\" and \\"host\\". We are also removing the original \\"data_stream\\" as it contains the source data stream name, which might not be the same in the destination.\\n\\n```ruby\\nfilter {\\n mutate {\\n remove_field => [\\"@version\\", \\"host\\", \\"data_stream\\"]\\n }\\n}\\n```\\n\\n**The** **output** \\nThe output is really simple — we are going to name our datastream logs-myapplication-reindex and we are using the document id of the original documents in document_id, to ensure there are no duplicate documents. In Elasticsearch, datastream names follow a convention \\\\-\\\\-\\\\ so our logs-myapplication-reindex datastream has “myapplication” as dataset and “prod” as namespace.\\n\\n```ruby\\nelasticsearch {\\n hosts => \\"${ELASTICSEARCH_HOST}\\"\\n\\n user => \\"${ELASTICSEARCH_USERNAME}\\"\\n password => \\"${ELASTICSEARCH_PASSWORD}\\"\\n\\n document_id => \\"%{[@metadata][doc][_id]}\\"\\n\\n data_stream => \\"true\\"\\n data_stream_type => \\"logs\\"\\n data_stream_dataset => \\"myapplication\\"\\n data_stream_namespace => \\"prod\\"\\n}\\n```\\n\\n### Deploying Logstash\\n\\nWe have a few options to deploy Logstash: it can be deployed [locally from the command line](https://www.elastic.co/guide/en/logstash/current/running-logstash-command-line.html), as a [systemd service](https://www.elastic.co/guide/en/logstash/current/running-logstash.html), via [docker](https://www.elastic.co/guide/en/logstash/current/docker.html), or on [Kubernetes](https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-logstash.html).\\n\\nSince both of our clusters are deployed in a Kubernetes environment, we are going to deploy Logstash as a **Pod** referencing our Docker image created earlier. Let’s put our pipeline inside a **ConfigMap** along with some configuration files (pipelines.yml and config.yml).\\n\\nIn the below configuration, we have SOURCE_INDEX_NAME, SOURCE_SLICES, SOURCE_PAGE_SIZE, LOGSTASH_WORKERS, and LOGSTASH_BATCH_SIZE conveniently exposed as environment variables so you just need to fill them out.\\n\\n```yaml\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: logstash-1\\nspec:\\n containers:\\n - name: logstash\\n image: ugosan/logstash-opensearch-input:8.10.0\\n imagePullPolicy: Always\\n env:\\n - name: SOURCE_INDEX_NAME\\n value: \\".ds-logs-benchmark-dev-000037\\"\\n - name: SOURCE_SLICES\\n value: \\"10\\"\\n - name: SOURCE_PAGE_SIZE\\n value: \\"500\\"\\n - name: LOGSTASH_WORKERS\\n value: \\"4\\"\\n - name: LOGSTASH_BATCH_SIZE\\n value: \\"1000\\"\\n - name: OPENSEARCH_USERNAME\\n valueFrom:\\n secretKeyRef:\\n name: os-cluster-admin-password\\n key: username\\n - name: OPENSEARCH_PASSWORD\\n valueFrom:\\n secretKeyRef:\\n name: os-cluster-admin-password\\n key: password\\n - name: ELASTICSEARCH_USERNAME\\n value: \\"elastic\\"\\n - name: ELASTICSEARCH_PASSWORD\\n valueFrom:\\n secretKeyRef:\\n name: es-cluster-es-elastic-user\\n key: elastic\\n resources:\\n limits:\\n memory: \\"4Gi\\"\\n cpu: \\"2500m\\"\\n requests:\\n memory: \\"1Gi\\"\\n cpu: \\"300m\\"\\n volumeMounts:\\n - name: config-volume\\n mountPath: /usr/share/logstash/config\\n - name: etc\\n mountPath: /etc/logstash\\n readOnly: true\\n volumes:\\n - name: config-volume\\n projected:\\n sources:\\n - configMap:\\n name: logstash-configmap\\n items:\\n - key: pipelines.yml\\n path: pipelines.yml\\n - key: logstash.yml\\n path: logstash.yml\\n - name: etc\\n projected:\\n sources:\\n - configMap:\\n name: logstash-configmap\\n items:\\n - key: pipeline.conf\\n path: pipelines/pipeline.conf\\n - secret:\\n name: os-cluster-http-cert\\n items:\\n - key: ca.crt\\n path: certificates/opensearch-ca.crt\\n - secret:\\n name: es-cluster-es-http-ca-internal\\n items:\\n - key: tls.crt\\n path: certificates/elasticsearch-ca.crt\\n---\\napiVersion: v1\\nkind: ConfigMap\\nmetadata:\\n name: logstash-configmap\\ndata:\\n pipelines.yml: |\\n - pipeline.id: reindex-os-es\\n path.config: \\"/etc/logstash/pipelines/pipeline.conf\\"\\n pipeline.batch.size: ${LOGSTASH_BATCH_SIZE}\\n pipeline.workers: ${LOGSTASH_WORKERS}\\n logstash.yml: |\\n log.level: info\\n pipeline.unsafe_shutdown: true\\n pipeline.ordered: false\\n pipeline.conf: |\\n input {\\n opensearch {\\n hosts => [\\"os-cluster:9200\\"]\\n ssl => true\\n ca_file => \\"/etc/logstash/certificates/opensearch-ca.crt\\"\\n user => \\"${OPENSEARCH_USERNAME}\\"\\n password => \\"${OPENSEARCH_PASSWORD}\\"\\n index => \\"${SOURCE_INDEX_NAME}\\"\\n slices => \\"${SOURCE_SLICES}\\"\\n size => \\"${SOURCE_PAGE_SIZE}\\"\\n scroll => \\"5m\\"\\n docinfo => true\\n docinfo_target => \\"[@metadata][doc]\\"\\n }\\n }\\n\\n filter {\\n mutate {\\n remove_field => [\\"@version\\", \\"host\\", \\"data_stream\\"]\\n }\\n }\\n\\n output {\\n elasticsearch {\\n hosts => \\"https://es-cluster-es-http:9200\\"\\n ssl => true\\n ssl_certificate_authorities => [\\"/etc/logstash/certificates/elasticsearch-ca.crt\\"]\\n ssl_verification_mode => \\"full\\"\\n\\n user => \\"${ELASTICSEARCH_USERNAME}\\"\\n password => \\"${ELASTICSEARCH_PASSWORD}\\"\\n\\n document_id => \\"%{[@metadata][doc][_id]}\\"\\n\\n data_stream => \\"true\\"\\n data_stream_type => \\"logs\\"\\n data_stream_dataset => \\"myapplication\\"\\n data_stream_namespace => \\"reindex\\"\\n }\\n }\\n```\\n\\n## That’s it.\\n\\nAfter a couple hours, we successfully migrated 1 billion documents from OpenSearch to Elasticsearch and even saved 23% plus on disk storage! Now that we have the logs in Elasticsearch how about extracting actual business value from them? Logs contain so much valuable information - we can not only do all sorts of interesting things with AIOPS, like [Automatically Categorize](https://www.elastic.co/guide/en/observability/current/categorize-logs.html#analyze-log-categories) those logs, but also extract [business metrics](https://www.youtube.com/watch?v=0E7isxR_FzY&list=PLzPXmNbs8vqUc2bROb1E2gNyj2GynRB5b&index=3&t=1122s) and [detect anomalies](https://www.youtube.com/watch?v=0E7isxR_FzY&list=PLzPXmNbs8vqUc2bROb1E2gNyj2GynRB5b&index=3&t=1906s) on them, give it a try.\\n\\n| | | | | | | |\\n| ---------------------------------- | --------- | ----------- | --------------------------------- | --------- | ----------- | ------ |\\n| OpenSearch | | | Elasticsearch | | | |\\n| Index | docs | size | Index | docs | size | Diff. |\\n| .ds-logs-myapplication-prod-000037 | 116842158 | 27285520870 | logs-myapplication-reindex-000037 | 116842158 | 21998435329 | 21.46% |\\n| .ds-logs-myapplication-prod-000038 | 110994116 | 27263291740 | logs-myapplication-reindex-000038 | 110994116 | 21540011082 | 23.45% |\\n| .ds-logs-myapplication-prod-000040 | 113362823 | 27872438186 | logs-myapplication-reindex-000040 | 113362823 | 22234641932 | 22.50% |\\n| .ds-logs-myapplication-prod-000041 | 112400019 | 27618801653 | logs-myapplication-reindex-000041 | 112400019 | 22059453868 | 22.38% |\\n| .ds-logs-myapplication-prod-000042 | 113859174 | 26686723701 | logs-myapplication-reindex-000042 | 113859174 | 21093766108 | 23.41% |\\n| .ds-logs-myapplication-prod-000043 | 113821016 | 27657006598 | logs-myapplication-reindex-000043 | 113821016 | 22059454752 | 22.52% |\\n| .ds-logs-myapplication-prod-000044 | 111093596 | 27281936915 | logs-myapplication-reindex-000044 | 111093596 | 21559513422 | 23.43% |\\n| .ds-logs-myapplication-prod-000048 | 114273539 | 28111420495 | logs-myapplication-reindex-000048 | 114273539 | 22264398939 | 23.21% |\\n| .ds-logs-myapplication-prod-000049 | 102519334 | 23731274338 | logs-myapplication-reindex-000049 | 102519334 | 19307250001 | 20.56% |\\n\\nInterested in trying Elasticsearch? [Start our 14-day free trial](https://cloud.elastic.co/registration?elektra=en-cloud-page).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var i in e)a(t,i,{get:e[i],enumerable:!0})},o=(t,e,i,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of m(e))!f.call(t,r)&&r!==i&&a(t,r,{get:()=>e[r],enumerable:!(s=g(e,r))||s.enumerable});return t};var b=(t,e,i)=>(i=t!=null?p(u(t)):{},o(e||!t||!t.__esModule?a(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>o(a({},\\"__esModule\\",{value:!0}),t);var c=w((x,l)=>{l.exports=_jsx_runtime});var S={};y(S,{default:()=>h,frontmatter:()=>_});var n=b(c()),_={title:\\"Migrating 1 billion log lines from OpenSearch to Elasticsearch\\",slug:\\"migrating-billion-log-lines-opensearch-elasticsearch\\",date:\\"2023-10-11\\",description:\\"Learn how to migrate 1 billion log lines from OpenSearch to Elasticsearch for improved performance and reduced disk usage. Discover the migration strategies, data transfer methods, and optimization techniques used in this guide.\\",author:[{slug:\\"ugo-sangiorgi\\"}],image:\\"elastic-blog-header-1-billion-log-lines.png\\",tags:[{slug:\\"log-analytics\\"},{slug:\\"opensearch\\"}]};function d(t){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",div:\\"div\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",table:\\"table\\",tbody:\\"tbody\\",td:\\"td\\",th:\\"th\\",thead:\\"thead\\",tr:\\"tr\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"What are the current options to migrate from OpenSearch to Elasticsearch\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\"?\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"OpenSearch is a fork of Elasticsearch 7.10 that has diverged quite a bit from itself lately, resulting in a different set of features and also different performance, as \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elasticsearch-opensearch-performance-gap\\",rel:\\"nofollow\\",children:\\"this benchmark\\"}),\\" shows (hint: it\\\\u2019s currently much slower than Elasticsearch).\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Given the differences between the two solutions, restoring a snapshot from OpenSearch is not possible, nor is reindex-from-remote, so our only option is then using something in between that will read from OpenSearch and write to Elasticsearch.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"This blog will show you how easy it is to migrate from OpenSearch to Elasticsearch for better performance and less disk usage!\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/blog-elastic-348gb-disk-space-logs.jpg\\",alt:\\"1 - arrows\\",width:\\"1999\\",height:\\"938\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"1-billion-log-lines\\",children:\\"1 billion log lines\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We are going to use part of the data set we used for the benchmark, which takes about half a terabyte on disk, including replicas, and spans over a week ( January 1\\\\u20137, 2023).\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We have in total 1,009,165,775 documents that take \\",(0,n.jsx)(e.strong,{children:\\"453.5GB\\"}),\\" of space in OpenSearch, including the replicas. That\\\\u2019s \\",(0,n.jsx)(e.strong,{children:\\"241.2KB per document\\"}),\\". This is going to be important later when we enable a couple optimizations in Elasticsearch that will bring this total size way down without sacrificing performance!\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"This billion log line data set is spread over nine indices that are part of a datastream we are calling logs-myapplication-prod. We have primary shards of about 25GB in size, according to the best practices for optimal shard sizing. A GET _cat/indices show us the indices we are dealing with:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`index docs.count pri rep pri.store.size store.size\\n.ds-logs-myapplication-prod-000049 102519334 1 1 22.1gb 44.2gb\\n.ds-logs-myapplication-prod-000048 114273539 1 1 26.1gb 52.3gb\\n.ds-logs-myapplication-prod-000044 111093596 1 1 25.4gb 50.8gb\\n.ds-logs-myapplication-prod-000043 113821016 1 1 25.7gb 51.5gb\\n.ds-logs-myapplication-prod-000042 113859174 1 1 24.8gb 49.7gb\\n.ds-logs-myapplication-prod-000041 112400019 1 1 25.7gb 51.4gb\\n.ds-logs-myapplication-prod-000040 113362823 1 1 25.9gb 51.9gb\\n.ds-logs-myapplication-prod-000038 110994116 1 1 25.3gb 50.7gb\\n.ds-logs-myapplication-prod-000037 116842158 1 1 25.4gb 50.8gb\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Both OpenSearch and Elasticsearch clusters have the same configuration: 3 nodes with 64GB RAM and 12 CPU cores. Just like in the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elasticsearch-opensearch-performance-gap\\",rel:\\"nofollow\\",children:\\"benchmark\\"}),\\", the clusters are running in Kubernetes.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"moving-data-from-a-to-b\\",children:\\"Moving data from A to B\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Typically, moving data from one Elasticsearch cluster to another is easy as a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/snapshot-restore.html\\",rel:\\"nofollow\\",children:\\"snapshot and restore\\"}),\\" if the clusters are compatible versions of each other or a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html#reindex-from-remote\\",rel:\\"nofollow\\",children:\\"reindex from remote\\"}),\\" if you need real-time synchronization and minimized downtime. These methods do not apply when migrating data from OpenSearch to Elasticsearch because the projects have significantly diverged from the 7.10 fork. However, there is one method that will work: scrolling.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"scrolling\\",children:\\"Scrolling\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Scrolling involves using an external tool, such as Logstash\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\", to read data from the source cluster and write it to the destination cluster. This method provides a high degree of customization, allowing us to transform the data during the migration process if needed. Here are a couple advantages of using Logstash:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Easy parallelization:\\"}),\\" It\\\\u2019s really easy to write concurrent jobs that can read from different \\\\u201Cslices\\\\u201D of the indices, essentially maximizing our throughput.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Queuing:\\"}),\\" Logstash automatically queues documents before sending.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Automatic retries:\\"}),\\" In the event of a failure or an error during data transmission, Logstash will automatically attempt to resend the data; moreover, it will stop querying the source cluster as often, until the connection is re-established, all without manual intervention.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Scrolling allows us to do an initial search and to keep pulling batches of results from Elasticsearch until there are no more results left, similar to how a \\\\u201Ccursor\\\\u201D works in relational databases.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"A \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/guide/master/scroll.html\\",rel:\\"nofollow\\",children:\\"scrolled search\\"}),\\" takes a snapshot in time by freezing the segments that make the index up until the time the request is made, preventing those segments from merging. As a result, the scroll doesn\\\\u2019t see any changes that are made to the index after the initial search request has been made.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"migration-strategies\\",children:\\"Migration strategies\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Reading from A and writing in B in can be slow without optimization because it involves paginating through the results, transferring each batch over the network to Logstash, which will assemble the documents in another batch and then transfer those batches over the network again to Elasticsearch, where the documents will be indexed. So when it comes to such large data sets, we must be very efficient and extract every bit of performance where we can.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Let\\\\u2019s start with the facts \\\\u2014 what do we know about the data we need to transfer? We have nine indices in the datastream, each with about 100 million documents. Let\\\\u2019s test with just one of the indices and measure the indexing rate to see how long it takes to migrate. The indexing rate can be seen by activating the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/monitoring-overview.html\\",rel:\\"nofollow\\",children:\\"monitoring\\"}),\\" functionality in Elastic\\",(0,n.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" and then navigating to the index you want to inspect.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Scrolling in the deep\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"The simplest approach for transferring the log lines over would be to make Elasticsearch scroll over the entire data set and check it later when it finishes. Here we will introduce our first two variables: PAGE_SIZE and BATCH_SIZE. The former is how many records we are going to bring from the source every time we query it, and the latter is how many documents are going to be assembled together by Logstash and written to the destination index.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/elastic-blog-2-scrolling-in-the-deep.jpg\\",alt:\\"Deep scrolling\\",width:\\"1635\\",height:\\"777\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"With such a large data set, the scroll slows down as this deep pagination progresses. The indexing rate starts at 6,000 docs/second and steadily descends down to 700 docs/second because the pagination gets very deep. Without any optimization, it would take us 19 days (!) to migrate the 1 billion documents. We can do better than that!\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/elastic-blog-3-index-rate.png\\",alt:\\"Indexing rate for a deep scroll\\",width:\\"1730\\",height:\\"604\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Slice me nice\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"We can optimize scrolling by using an approach called \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html#slice-scroll\\",rel:\\"nofollow\\",children:\\"Sliced scroll\\"}),\\", where we split the index in different slices to consume them independently.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here we will introduce our last two variables: SLICES and WORKERS. The amount of slices cannot be too small as the performance decreases drastically over time, and it can\\\\u2019t be too big as the overhead of maintaining the scrolls would counter the benefits of a smaller search.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/elastic-blog-4-slice-me-nice.jpg\\",alt:\\"Sliced scroll\\",width:\\"1590\\",height:\\"777\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\\\\u2019s start by migrating a single index (out of the nine we have) with different parameters to see what combination gives us the highest throughput.\\"}),`\\n`,(0,n.jsx)(e.div,{className:\\"table-container\\",children:(0,n.jsxs)(e.table,{children:[(0,n.jsx)(e.thead,{children:(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{})]})}),(0,n.jsxs)(e.tbody,{children:[(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"SLICES\\"}),(0,n.jsx)(e.td,{children:\\"PAGE_SIZE\\"}),(0,n.jsx)(e.td,{children:\\"WORKERS\\"}),(0,n.jsx)(e.td,{children:\\"BATCH_SIZE\\"}),(0,n.jsx)(e.td,{children:\\"Average Indexing Rate\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"3\\"}),(0,n.jsx)(e.td,{children:\\"500\\"}),(0,n.jsx)(e.td,{children:\\"3\\"}),(0,n.jsx)(e.td,{children:\\"500\\"}),(0,n.jsx)(e.td,{children:\\"13,319 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"3\\"}),(0,n.jsx)(e.td,{children:\\"1,000\\"}),(0,n.jsx)(e.td,{children:\\"3\\"}),(0,n.jsx)(e.td,{children:\\"1,000\\"}),(0,n.jsx)(e.td,{children:\\"13,048 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"4\\"}),(0,n.jsx)(e.td,{children:\\"250\\"}),(0,n.jsx)(e.td,{children:\\"4\\"}),(0,n.jsx)(e.td,{children:\\"250\\"}),(0,n.jsx)(e.td,{children:\\"10,199 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"4\\"}),(0,n.jsx)(e.td,{children:\\"500\\"}),(0,n.jsx)(e.td,{children:\\"4\\"}),(0,n.jsx)(e.td,{children:\\"500\\"}),(0,n.jsx)(e.td,{children:\\"12,692 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"4\\"}),(0,n.jsx)(e.td,{children:\\"1,000\\"}),(0,n.jsx)(e.td,{children:\\"4\\"}),(0,n.jsx)(e.td,{children:\\"1,000\\"}),(0,n.jsx)(e.td,{children:\\"10,900 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"5\\"}),(0,n.jsx)(e.td,{children:\\"500\\"}),(0,n.jsx)(e.td,{children:\\"5\\"}),(0,n.jsx)(e.td,{children:\\"500\\"}),(0,n.jsx)(e.td,{children:\\"12,647 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"5\\"}),(0,n.jsx)(e.td,{children:\\"1,000\\"}),(0,n.jsx)(e.td,{children:\\"5\\"}),(0,n.jsx)(e.td,{children:\\"1,000\\"}),(0,n.jsx)(e.td,{children:\\"10,334 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"5\\"}),(0,n.jsx)(e.td,{children:\\"2,000\\"}),(0,n.jsx)(e.td,{children:\\"5\\"}),(0,n.jsx)(e.td,{children:\\"2,000\\"}),(0,n.jsx)(e.td,{children:\\"10,405 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"10\\"}),(0,n.jsx)(e.td,{children:\\"250\\"}),(0,n.jsx)(e.td,{children:\\"10\\"}),(0,n.jsx)(e.td,{children:\\"250\\"}),(0,n.jsx)(e.td,{children:\\"14,083 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"10\\"}),(0,n.jsx)(e.td,{children:\\"250\\"}),(0,n.jsx)(e.td,{children:\\"4\\"}),(0,n.jsx)(e.td,{children:\\"1,000\\"}),(0,n.jsx)(e.td,{children:\\"12,014 docs/sec\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"10\\"}),(0,n.jsx)(e.td,{children:\\"500\\"}),(0,n.jsx)(e.td,{children:\\"4\\"}),(0,n.jsx)(e.td,{children:\\"1,000\\"}),(0,n.jsx)(e.td,{children:\\"10,956 docs/sec\\"})]})]})]})}),`\\n`,(0,n.jsx)(e.p,{children:\\"It looks like we have a good set of candidates for maximizing the throughput for a single index, in between 12K and 14K documents per second. That doesn\'t mean we have reached our ceiling. Even though search operations are single threaded and every slice will trigger sequential search operations to read data, that does not prevent us from reading several indices in parallel.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"By default, the maximum number of open scrolls is 500 \\\\u2014 this limit can be updated with the search.max_open_scroll_context cluster setting, but the default value is enough for this particular migration.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/elastic-blog-5-index-rate-volatile.png\\",alt:\\"5 - indexing rate\\",width:\\"1554\\",height:\\"498\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"lets-migrate\\",children:\\"Let\\\\u2019s migrate\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"preparing-our-destination-indices\\",children:\\"Preparing our destination indices\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We are going to create a datastream called logs-myapplication-reindex to write the data to, but before indexing any data, let\\\\u2019s ensure our \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html\\",rel:\\"nofollow\\",children:\\"index template\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/ilm-index-lifecycle.html\\",rel:\\"nofollow\\",children:\\"index lifecycle management\\"}),\\" configurations are properly set up. An index template acts as a blueprint for creating new indices, allowing you to define various settings that should be applied consistently across your indices.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Index lifecycle management policy\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"Index lifecycle management (ILM) is equally vital, as it automates the management of indices throughout their lifecycle. With ILM, you can define policies that determine how long data should be retained, when it should be rolled over into new indices, and when old indices should be deleted or archived. Our policy is really straightforward:\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ilm/policy/logs-myapplication-lifecycle-policy\\n{\\n \\"policy\\": {\\n \\"phases\\": {\\n \\"hot\\": {\\n \\"actions\\": {\\n \\"rollover\\": {\\n \\"max_primary_shard_size\\": \\"25gb\\"\\n }\\n }\\n },\\n \\"warm\\": {\\n \\"min_age\\": \\"0d\\",\\n \\"actions\\": {\\n \\"forcemerge\\": {\\n \\"max_num_segments\\": 1\\n }\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Index template (and saving 23% in disk space)\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"Since we are here, we\\\\u2019re going to go ahead and enable \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#synthetic-source\\",rel:\\"nofollow\\",children:\\"Synthetic Source\\"}),\\", a clever feature that allows us to store and discard the original JSON document while still reconstructing it when needed from the stored fields.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"For our example, enabling Synthetic Source resulted in a remarkable \\",(0,n.jsx)(e.strong,{children:\\"23.4% improvement in storage efficiency\\"}),\\" , reducing the size required to store a single document from 241.2KB in OpenSearch to just \\",(0,n.jsx)(e.strong,{children:\\"185KB\\"}),\\" in Elasticsearch.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Our full index template is therefore:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _index_template/logs-myapplication-reindex\\n{\\n \\"index_patterns\\": [\\n \\"logs-myapplication-reindex\\"\\n ],\\n \\"priority\\": 500,\\n \\"data_stream\\": {},\\n \\"template\\": {\\n \\"settings\\": {\\n \\"index\\": {\\n \\"lifecycle.name\\": \\"logs-myapplication-lifecycle-policy\\",\\n \\"codec\\": \\"best_compression\\",\\n \\"number_of_shards\\": \\"1\\",\\n \\"number_of_replicas\\": \\"1\\",\\n \\"query\\": {\\n \\"default_field\\": [\\n \\"message\\"\\n ]\\n }\\n }\\n },\\n \\"mappings\\": {\\n \\"_source\\": {\\n \\"mode\\": \\"synthetic\\"\\n },\\n \\"_data_stream_timestamp\\": {\\n \\"enabled\\": true\\n },\\n \\"date_detection\\": false,\\n \\"properties\\": {\\n \\"@timestamp\\": {\\n \\"type\\": \\"date\\"\\n },\\n \\"agent\\": {\\n \\"properties\\": {\\n \\"ephemeral_id\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"id\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"name\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"type\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"version\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"aws\\": {\\n \\"properties\\": {\\n \\"cloudwatch\\": {\\n \\"properties\\": {\\n \\"ingestion_time\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"log_group\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"log_stream\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n }\\n }\\n },\\n \\"cloud\\": {\\n \\"properties\\": {\\n \\"region\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"data_stream\\": {\\n \\"properties\\": {\\n \\"dataset\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"namespace\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"type\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"ecs\\": {\\n \\"properties\\": {\\n \\"version\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"event\\": {\\n \\"properties\\": {\\n \\"dataset\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"id\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n },\\n \\"ingested\\": {\\n \\"type\\": \\"date\\"\\n }\\n }\\n },\\n \\"host\\": {\\n \\"type\\": \\"object\\"\\n },\\n \\"input\\": {\\n \\"properties\\": {\\n \\"type\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"log\\": {\\n \\"properties\\": {\\n \\"file\\": {\\n \\"properties\\": {\\n \\"path\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n }\\n }\\n },\\n \\"message\\": {\\n \\"type\\": \\"match_only_text\\"\\n },\\n \\"meta\\": {\\n \\"properties\\": {\\n \\"file\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"metrics\\": {\\n \\"properties\\": {\\n \\"size\\": {\\n \\"type\\": \\"long\\"\\n },\\n \\"tmin\\": {\\n \\"type\\": \\"long\\"\\n }\\n }\\n },\\n \\"process\\": {\\n \\"properties\\": {\\n \\"name\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n },\\n \\"tags\\": {\\n \\"type\\": \\"keyword\\",\\n \\"ignore_above\\": 1024\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"building-a-custom-logstash-image\\",children:\\"Building a custom Logstash image\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We are going to use a containerized Logstash for this migration because both clusters are sitting on a Kubernetes infrastructure, so it\'s easier to just spin up a Pod that will communicate to both clusters.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Since OpenSearch is not an official Logstash input, we must build a custom Logstash image that contains the logstash-input-opensearch plugin. Let\\\\u2019s use the base image from docker.elastic.co/logstash/logstash:8.16.1 and just install the plugin:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-dockerfile\\",children:`FROM docker.elastic.co/logstash/logstash:8.16.1\\n\\nUSER logstash\\nWORKDIR /usr/share/logstash\\nRUN bin/logstash-plugin install logstash-input-opensearch\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"writing-a-logstash-pipeline\\",children:\\"Writing a Logstash pipeline\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now we have our Logstash Docker image, and we need to write a pipeline that will read from OpenSearch and write to Elasticsearch.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"The\\"}),\\" \\",(0,n.jsx)(e.strong,{children:\\"input\\"})]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-ruby\\",children:`input {\\n opensearch {\\n hosts => [\\"os-cluster:9200\\"]\\n ssl => true\\n ca_file => \\"/etc/logstash/certificates/opensearch-ca.crt\\"\\n user => \\"\\\\${OPENSEARCH_USERNAME}\\"\\n password => \\"\\\\${OPENSEARCH_PASSWORD}\\"\\n index => \\"\\\\${SOURCE_INDEX_NAME}\\"\\n slices => \\"\\\\${SOURCE_SLICES}\\"\\n size => \\"\\\\${SOURCE_PAGE_SIZE}\\"\\n scroll => \\"5m\\"\\n docinfo => true\\n docinfo_target => \\"[@metadata][doc]\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\\\\u2019s break down the most important input parameters. The values are all represented as environment variables here:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"hosts:\\"}),\\" Specifies the host and port of the OpenSearch cluster. In this case, it\\\\u2019s connecting to \\\\u201Cos-cluster\\\\u201D on port 9200.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"index:\\"}),\\" Specifies the index in the OpenSearch cluster from which to retrieve logs. In this case, it\\\\u2019s \\\\u201Clogs-myapplication-prod\\\\u201D which is a datastream that contains the actual indices (e.g., .ds-logs-myapplication-prod-000049).\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"size:\\"}),\\" Specifies the maximum number of logs to retrieve in each request.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"scroll:\\"}),\\" Defines how long a search context will be kept open on the OpenSearch server. In this case, it\\\\u2019s set to \\\\u201C5m,\\\\u201D which means each request must be answered and a new \\\\u201Cpage\\\\u201D asked within five minutes.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"docinfo\\"}),\\" and \\",(0,n.jsx)(e.strong,{children:\\"docinfo_target:\\"}),\\" These settings control whether document metadata should be included in the Logstash output and where it should be stored. In this case, document metadata is being stored in the [@metadata][doc] field \\\\u2014 this is important because the document\\\\u2019s _id will be used as the destination id as well.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The ssl and ca_file are highly recommended if you are migrating from clusters that are in a different infrastructure (separate cloud providers). You don\\\\u2019t need to specify a ca_file if your TLS certificates are signed by a public authority, which is likely the case if you are using a SaaS and your endpoint is reachable over the internet. In this case, only ssl => true would suffice. In our case, all our TLS certificates are self-signed, so we must also provide the Certificate Authority (CA) certificate.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"The (optional)\\"}),\\" \\",(0,n.jsx)(e.strong,{children:\\"filter\\"}),(0,n.jsx)(e.br,{}),`\\n`,\'We could use this to drop or alter the documents to be written to Elasticsearch if we wanted, but we are not going to, as we want to migrate the documents as is. We are only removing extra metadata fields that Logstash includes in all documents, such as \\"@version\\" and \\"host\\". We are also removing the original \\"data_stream\\" as it contains the source data stream name, which might not be the same in the destination.\']}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-ruby\\",children:`filter {\\n mutate {\\n remove_field => [\\"@version\\", \\"host\\", \\"data_stream\\"]\\n }\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"The\\"}),\\" \\",(0,n.jsx)(e.strong,{children:\\"output\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"The output is really simple \\\\u2014 we are going to name our datastream logs-myapplication-reindex and we are using the document id of the original documents in document_id, to ensure there are no duplicate documents. In Elasticsearch, datastream names follow a convention -- so our logs-myapplication-reindex datastream has \\\\u201Cmyapplication\\\\u201D as dataset and \\\\u201Cprod\\\\u201D as namespace.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-ruby\\",children:`elasticsearch {\\n hosts => \\"\\\\${ELASTICSEARCH_HOST}\\"\\n\\n user => \\"\\\\${ELASTICSEARCH_USERNAME}\\"\\n password => \\"\\\\${ELASTICSEARCH_PASSWORD}\\"\\n\\n document_id => \\"%{[@metadata][doc][_id]}\\"\\n\\n data_stream => \\"true\\"\\n data_stream_type => \\"logs\\"\\n data_stream_dataset => \\"myapplication\\"\\n data_stream_namespace => \\"prod\\"\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"deploying-logstash\\",children:\\"Deploying Logstash\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We have a few options to deploy Logstash: it can be deployed \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/logstash/current/running-logstash-command-line.html\\",rel:\\"nofollow\\",children:\\"locally from the command line\\"}),\\", as a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/logstash/current/running-logstash.html\\",rel:\\"nofollow\\",children:\\"systemd service\\"}),\\", via \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/logstash/current/docker.html\\",rel:\\"nofollow\\",children:\\"docker\\"}),\\", or on \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-logstash.html\\",rel:\\"nofollow\\",children:\\"Kubernetes\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Since both of our clusters are deployed in a Kubernetes environment, we are going to deploy Logstash as a \\",(0,n.jsx)(e.strong,{children:\\"Pod\\"}),\\" referencing our Docker image created earlier. Let\\\\u2019s put our pipeline inside a \\",(0,n.jsx)(e.strong,{children:\\"ConfigMap\\"}),\\" along with some configuration files (pipelines.yml and config.yml).\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In the below configuration, we have SOURCE_INDEX_NAME, SOURCE_SLICES, SOURCE_PAGE_SIZE, LOGSTASH_WORKERS, and LOGSTASH_BATCH_SIZE conveniently exposed as environment variables so you just need to fill them out.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`apiVersion: v1\\nkind: Pod\\nmetadata:\\n name: logstash-1\\nspec:\\n containers:\\n - name: logstash\\n image: ugosan/logstash-opensearch-input:8.10.0\\n imagePullPolicy: Always\\n env:\\n - name: SOURCE_INDEX_NAME\\n value: \\".ds-logs-benchmark-dev-000037\\"\\n - name: SOURCE_SLICES\\n value: \\"10\\"\\n - name: SOURCE_PAGE_SIZE\\n value: \\"500\\"\\n - name: LOGSTASH_WORKERS\\n value: \\"4\\"\\n - name: LOGSTASH_BATCH_SIZE\\n value: \\"1000\\"\\n - name: OPENSEARCH_USERNAME\\n valueFrom:\\n secretKeyRef:\\n name: os-cluster-admin-password\\n key: username\\n - name: OPENSEARCH_PASSWORD\\n valueFrom:\\n secretKeyRef:\\n name: os-cluster-admin-password\\n key: password\\n - name: ELASTICSEARCH_USERNAME\\n value: \\"elastic\\"\\n - name: ELASTICSEARCH_PASSWORD\\n valueFrom:\\n secretKeyRef:\\n name: es-cluster-es-elastic-user\\n key: elastic\\n resources:\\n limits:\\n memory: \\"4Gi\\"\\n cpu: \\"2500m\\"\\n requests:\\n memory: \\"1Gi\\"\\n cpu: \\"300m\\"\\n volumeMounts:\\n - name: config-volume\\n mountPath: /usr/share/logstash/config\\n - name: etc\\n mountPath: /etc/logstash\\n readOnly: true\\n volumes:\\n - name: config-volume\\n projected:\\n sources:\\n - configMap:\\n name: logstash-configmap\\n items:\\n - key: pipelines.yml\\n path: pipelines.yml\\n - key: logstash.yml\\n path: logstash.yml\\n - name: etc\\n projected:\\n sources:\\n - configMap:\\n name: logstash-configmap\\n items:\\n - key: pipeline.conf\\n path: pipelines/pipeline.conf\\n - secret:\\n name: os-cluster-http-cert\\n items:\\n - key: ca.crt\\n path: certificates/opensearch-ca.crt\\n - secret:\\n name: es-cluster-es-http-ca-internal\\n items:\\n - key: tls.crt\\n path: certificates/elasticsearch-ca.crt\\n---\\napiVersion: v1\\nkind: ConfigMap\\nmetadata:\\n name: logstash-configmap\\ndata:\\n pipelines.yml: |\\n - pipeline.id: reindex-os-es\\n path.config: \\"/etc/logstash/pipelines/pipeline.conf\\"\\n pipeline.batch.size: \\\\${LOGSTASH_BATCH_SIZE}\\n pipeline.workers: \\\\${LOGSTASH_WORKERS}\\n logstash.yml: |\\n log.level: info\\n pipeline.unsafe_shutdown: true\\n pipeline.ordered: false\\n pipeline.conf: |\\n input {\\n opensearch {\\n hosts => [\\"os-cluster:9200\\"]\\n ssl => true\\n ca_file => \\"/etc/logstash/certificates/opensearch-ca.crt\\"\\n user => \\"\\\\${OPENSEARCH_USERNAME}\\"\\n password => \\"\\\\${OPENSEARCH_PASSWORD}\\"\\n index => \\"\\\\${SOURCE_INDEX_NAME}\\"\\n slices => \\"\\\\${SOURCE_SLICES}\\"\\n size => \\"\\\\${SOURCE_PAGE_SIZE}\\"\\n scroll => \\"5m\\"\\n docinfo => true\\n docinfo_target => \\"[@metadata][doc]\\"\\n }\\n }\\n\\n filter {\\n mutate {\\n remove_field => [\\"@version\\", \\"host\\", \\"data_stream\\"]\\n }\\n }\\n\\n output {\\n elasticsearch {\\n hosts => \\"https://es-cluster-es-http:9200\\"\\n ssl => true\\n ssl_certificate_authorities => [\\"/etc/logstash/certificates/elasticsearch-ca.crt\\"]\\n ssl_verification_mode => \\"full\\"\\n\\n user => \\"\\\\${ELASTICSEARCH_USERNAME}\\"\\n password => \\"\\\\${ELASTICSEARCH_PASSWORD}\\"\\n\\n document_id => \\"%{[@metadata][doc][_id]}\\"\\n\\n data_stream => \\"true\\"\\n data_stream_type => \\"logs\\"\\n data_stream_dataset => \\"myapplication\\"\\n data_stream_namespace => \\"reindex\\"\\n }\\n }\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"thats-it\\",children:\\"That\\\\u2019s it.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"After a couple hours, we successfully migrated 1 billion documents from OpenSearch to Elasticsearch and even saved 23% plus on disk storage! Now that we have the logs in Elasticsearch how about extracting actual business value from them? Logs contain so much valuable information - we can not only do all sorts of interesting things with AIOPS, like \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/categorize-logs.html#analyze-log-categories\\",rel:\\"nofollow\\",children:\\"Automatically Categorize\\"}),\\" those logs, but also extract \\",(0,n.jsx)(e.a,{href:\\"https://www.youtube.com/watch?v=0E7isxR_FzY&list=PLzPXmNbs8vqUc2bROb1E2gNyj2GynRB5b&index=3&t=1122s\\",rel:\\"nofollow\\",children:\\"business metrics\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://www.youtube.com/watch?v=0E7isxR_FzY&list=PLzPXmNbs8vqUc2bROb1E2gNyj2GynRB5b&index=3&t=1906s\\",rel:\\"nofollow\\",children:\\"detect anomalies\\"}),\\" on them, give it a try.\\"]}),`\\n`,(0,n.jsx)(e.div,{className:\\"table-container\\",children:(0,n.jsxs)(e.table,{children:[(0,n.jsx)(e.thead,{children:(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{}),(0,n.jsx)(e.th,{})]})}),(0,n.jsxs)(e.tbody,{children:[(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"OpenSearch\\"}),(0,n.jsx)(e.td,{}),(0,n.jsx)(e.td,{}),(0,n.jsx)(e.td,{children:\\"Elasticsearch\\"}),(0,n.jsx)(e.td,{}),(0,n.jsx)(e.td,{}),(0,n.jsx)(e.td,{})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\"Index\\"}),(0,n.jsx)(e.td,{children:\\"docs\\"}),(0,n.jsx)(e.td,{children:\\"size\\"}),(0,n.jsx)(e.td,{children:\\"Index\\"}),(0,n.jsx)(e.td,{children:\\"docs\\"}),(0,n.jsx)(e.td,{children:\\"size\\"}),(0,n.jsx)(e.td,{children:\\"Diff.\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\".ds-logs-myapplication-prod-000037\\"}),(0,n.jsx)(e.td,{children:\\"116842158\\"}),(0,n.jsx)(e.td,{children:\\"27285520870\\"}),(0,n.jsx)(e.td,{children:\\"logs-myapplication-reindex-000037\\"}),(0,n.jsx)(e.td,{children:\\"116842158\\"}),(0,n.jsx)(e.td,{children:\\"21998435329\\"}),(0,n.jsx)(e.td,{children:\\"21.46%\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\".ds-logs-myapplication-prod-000038\\"}),(0,n.jsx)(e.td,{children:\\"110994116\\"}),(0,n.jsx)(e.td,{children:\\"27263291740\\"}),(0,n.jsx)(e.td,{children:\\"logs-myapplication-reindex-000038\\"}),(0,n.jsx)(e.td,{children:\\"110994116\\"}),(0,n.jsx)(e.td,{children:\\"21540011082\\"}),(0,n.jsx)(e.td,{children:\\"23.45%\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\".ds-logs-myapplication-prod-000040\\"}),(0,n.jsx)(e.td,{children:\\"113362823\\"}),(0,n.jsx)(e.td,{children:\\"27872438186\\"}),(0,n.jsx)(e.td,{children:\\"logs-myapplication-reindex-000040\\"}),(0,n.jsx)(e.td,{children:\\"113362823\\"}),(0,n.jsx)(e.td,{children:\\"22234641932\\"}),(0,n.jsx)(e.td,{children:\\"22.50%\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\".ds-logs-myapplication-prod-000041\\"}),(0,n.jsx)(e.td,{children:\\"112400019\\"}),(0,n.jsx)(e.td,{children:\\"27618801653\\"}),(0,n.jsx)(e.td,{children:\\"logs-myapplication-reindex-000041\\"}),(0,n.jsx)(e.td,{children:\\"112400019\\"}),(0,n.jsx)(e.td,{children:\\"22059453868\\"}),(0,n.jsx)(e.td,{children:\\"22.38%\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\".ds-logs-myapplication-prod-000042\\"}),(0,n.jsx)(e.td,{children:\\"113859174\\"}),(0,n.jsx)(e.td,{children:\\"26686723701\\"}),(0,n.jsx)(e.td,{children:\\"logs-myapplication-reindex-000042\\"}),(0,n.jsx)(e.td,{children:\\"113859174\\"}),(0,n.jsx)(e.td,{children:\\"21093766108\\"}),(0,n.jsx)(e.td,{children:\\"23.41%\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\".ds-logs-myapplication-prod-000043\\"}),(0,n.jsx)(e.td,{children:\\"113821016\\"}),(0,n.jsx)(e.td,{children:\\"27657006598\\"}),(0,n.jsx)(e.td,{children:\\"logs-myapplication-reindex-000043\\"}),(0,n.jsx)(e.td,{children:\\"113821016\\"}),(0,n.jsx)(e.td,{children:\\"22059454752\\"}),(0,n.jsx)(e.td,{children:\\"22.52%\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\".ds-logs-myapplication-prod-000044\\"}),(0,n.jsx)(e.td,{children:\\"111093596\\"}),(0,n.jsx)(e.td,{children:\\"27281936915\\"}),(0,n.jsx)(e.td,{children:\\"logs-myapplication-reindex-000044\\"}),(0,n.jsx)(e.td,{children:\\"111093596\\"}),(0,n.jsx)(e.td,{children:\\"21559513422\\"}),(0,n.jsx)(e.td,{children:\\"23.43%\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\".ds-logs-myapplication-prod-000048\\"}),(0,n.jsx)(e.td,{children:\\"114273539\\"}),(0,n.jsx)(e.td,{children:\\"28111420495\\"}),(0,n.jsx)(e.td,{children:\\"logs-myapplication-reindex-000048\\"}),(0,n.jsx)(e.td,{children:\\"114273539\\"}),(0,n.jsx)(e.td,{children:\\"22264398939\\"}),(0,n.jsx)(e.td,{children:\\"23.21%\\"})]}),(0,n.jsxs)(e.tr,{children:[(0,n.jsx)(e.td,{children:\\".ds-logs-myapplication-prod-000049\\"}),(0,n.jsx)(e.td,{children:\\"102519334\\"}),(0,n.jsx)(e.td,{children:\\"23731274338\\"}),(0,n.jsx)(e.td,{children:\\"logs-myapplication-reindex-000049\\"}),(0,n.jsx)(e.td,{children:\\"102519334\\"}),(0,n.jsx)(e.td,{children:\\"19307250001\\"}),(0,n.jsx)(e.td,{children:\\"20.56%\\"})]})]})]})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Interested in trying Elasticsearch? \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?elektra=en-cloud-page\\",rel:\\"nofollow\\",children:\\"Start our 14-day free trial\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(d,{...t})}):d(t)}return v(S);})();\\n;return Component;"},"_id":"articles/migrating-1-billion-log-lines-opensearch-elasticsearch.mdx","_raw":{"sourceFilePath":"articles/migrating-1-billion-log-lines-opensearch-elasticsearch.mdx","sourceFileName":"migrating-1-billion-log-lines-opensearch-elasticsearch.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/migrating-1-billion-log-lines-opensearch-elasticsearch"},"type":"Article","imageUrl":"/assets/images/migrating-billion-log-lines-opensearch-elasticsearch/elastic-blog-header-1-billion-log-lines.png","readingTime":"38 min read","url":"/migrating-billion-log-lines-opensearch-elasticsearch","headings":[{"level":2,"title":"1 billion log lines","href":"#1-billion-log-lines"},{"level":2,"title":"Moving data from A to B","href":"#moving-data-from-a-to-b"},{"level":3,"title":"Scrolling","href":"#scrolling"},{"level":3,"title":"Migration strategies","href":"#migration-strategies"},{"level":2,"title":"Let’s migrate","href":"#lets-migrate"},{"level":3,"title":"Preparing our destination indices","href":"#preparing-our-destination-indices"},{"level":3,"title":"Building a custom Logstash image","href":"#building-a-custom-logstash-image"},{"level":3,"title":"Writing a Logstash pipeline","href":"#writing-a-logstash-pipeline"},{"level":3,"title":"Deploying Logstash","href":"#deploying-logstash"},{"level":2,"title":"That’s it.","href":"#thats-it"}]},{"title":"Monitor dbt pipelines with Elastic Observability","slug":"monitor-dbt-pipelines-with-elastic-observability","date":"2024-07-26","description":"Learn how to set up a dbt monitoring system with Elastic that proactively alerts on data processing cost spikes, anomalies in rows per table, and data quality test failures","image":"monitoring-dbt-with-elastic.png","author":[{"slug":"almudena-sanz-olive","type":"Author","_raw":{}},{"slug":"tamara-dancheva","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"aiops","type":"Tag","_raw":{}},{"slug":"ai-assistant","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the Data Analytics team within the Observability organization in Elastic, we use [dbt (dbt™, data build tool)](https://www.getdbt.com/product/what-is-dbt) to execute our SQL data transformation pipelines. dbt is a SQL-first transformation workflow that lets teams quickly and collaboratively deploy analytics code. In particular, we use [dbt core](https://docs.getdbt.com/docs/core/installation-overview), the [open-source project](https://github.com/dbt-labs/dbt-core), where you can develop from the command line and run your dbt project.\\n\\nOur data transformation pipelines run daily and process the data that feed our internal dashboards, reports, analyses, and Machine Learning (ML) models.\\n\\nThere have been incidents in the past when the pipelines have failed, the source tables contained wrong data or we have introduced a change into our SQL code that has caused data quality issues, and we only realized once we saw it in a weekly report that was showing an anomalous number of records. That’s why we have built a monitoring system that proactively alerts us about these types of incidents as soon as they happen and helps us with visualizations and analyses to understand their root cause, saving us several hours or days of manual investigations.\\n\\nWe have leveraged our own Observability Solution to help solve this challenge, monitoring the entire lifecycle of our dbt implementation. This setup enables us to track the behavior of our models and conduct data quality testing on the final tables. We export dbt process logs from run jobs and tests into Elasticsearch and utilize Kibana to create dashboards, set up alerts, and configure Machine Learning jobs to monitor and assess issues.\\n\\nThe following diagram shows our complete architecture. In a follow-up article, we’ll also cover how we observe our python data processing and ML model processes using OTEL and Elastic - stay tuned.\\n\\n![1 - architecture](/assets/images/monitor-dbt-pipelines-with-elastic-observability/architecture.png)\\n\\n## Why monitor dbt pipelines with Elastic?\\n\\nWith every invocation, dbt generates and saves one or more JSON files called [artifacts](https://docs.getdbt.com/reference/artifacts/dbt-artifacts) containing log data on the invocation results. `dbt run` and `dbt test` invocation logs are [stored in the file `run_results.json`](https://docs.getdbt.com/reference/artifacts/run-results-json), as per the dbt documentation:\\n\\n> This file contains information about a completed invocation of dbt, including timing and status info for each node (model, test, etc) that was executed. In aggregate, many `run_results.json` can be combined to calculate average model runtime, test failure rates, the number of record changes captured by snapshots, etc.\\n\\nMonitoring `dbt run` invocation logs can help solve several issues, including tracking and alerting about table volumes, detecting excessive slot time from resource-intensive models, identifying cost spikes due to slot time or volume, and pinpointing slow execution times that may indicate scheduling issues. This system was crucial when we merged a PR with a change in our code that had an issue, producing a sudden drop in the number of daily rows in upstream Table A. By ingesting the `dbt run` logs into Elastic, our anomaly detection job quickly identified anomalies in the daily row counts for Table A and its downstream tables, B, C, and D. The Data Analytics team received an alert notification about the issue, allowing us to promptly troubleshoot, fix and backfill the tables before it affected the weekly dashboards and downstream ML models.\\n\\nMonitoring `dbt test` invocation logs can also address several issues, such as identifying duplicates in tables, detecting unnoticed alterations in allowed values for specific fields through validation of all enum fields, and resolving various other data processing and quality concerns. With dashboards and alerts on data quality tests, we proactively identify issues like duplicate keys, unexpected category values, and increased nulls, ensuring data integrity. In our team, we had an issue where a change in one of our raw lookup tables produced duplicated rows in our user table, doubling the number of users reported. By ingesting the `dbt test` logs into Elastic, our rules detected that some duplicate tests had failed. The team received an alert notification about the issue, allowing us to troubleshoot it right away by finding the upstream table that was the root cause. These duplicates meant that downstream tables had to process 2x the amount of data, creating a spike in the bytes processed and slot time. The anomaly detection and alerts on the `dbt run` logs also helped us spot these spikes for individual tables and allowed us to quantify the impact on our billing.\\n\\nProcessing our dbt logs with Elastic and Kibana allows us to obtain real-time insights, helps us quickly troubleshoot potential issues, and keeps our data transformation processes running smoothly. We set up anomaly detection jobs and alerts in Kibana to monitor the number of rows processed by dbt, the slot time, and the results of the tests. This lets us catch real-time incidents, and by promptly identifying and fixing these issues, Elastic makes our data pipeline more resilient and our models more cost-effective, helping us stay on top of cost spikes or data quality issues.\\n\\nWe can also correlate this information with other events ingested into Elastic, for example using the [Elastic Github connector](https://www.elastic.co/guide/en/enterprise-search/current/connectors-github.html), we can correlate data quality test failures or other anomalies with code changes to find the root cause of the commit or PR that caused the issues. By ingesting application logs into Elastic, we can also analyze if these issues in our pipelines have affected downstream applications, increasing latency, throughput or error rates using APM. Ingesting billing, revenue data or web traffic, we could also see the impact in business metrics.\\n\\n## How to export dbt invocation logs to Elasticsearch\\n\\nWe use the [Python Elasticsearch client](https://elasticsearch-py.readthedocs.io/en) to send the dbt invocation logs to Elastic after we run our `dbt run` and `dbt test` processes daily in production. The setup just requires you to install the [Elasticsearch Python client](https://elasticsearch-py.readthedocs.io/en/v8.14.0/quickstart.html#installation) and obtain your Elastic Cloud ID (go to https://cloud.elastic.co/deployments/, select your deployment and find the `Cloud ID`) and Elastic Cloud API Key [(following this guide)](https://elasticsearch-py.readthedocs.io/en/v8.14.0/quickstart.html#connecting)\\n\\nThis python helper function will index the results from your `run_results.json` file to the specified index. You just need to export the variables to the environment:\\n\\n- `RESULTS_FILE`: path to your `run_results.json` file\\n- `DBT_RUN_LOGS_INDEX`: the name you want to give to dbt run logs index in Elastic, e.g. `dbt_run_logs`\\n- `DBT_TEST_LOGS_INDEX`: the name you want to give to the dbt test logs index in Elastic, e.g. `dbt_test_logs`\\n- `ES_CLUSTER_CLOUD_ID`\\n- `ES_CLUSTER_API_KEY`\\n\\nThen call the function `log_dbt_es` from your python code or save this code as a python script and run it after executing your `dbt run` or `dbt test` commands:\\n\\n```\\nfrom elasticsearch import Elasticsearch, helpers\\nimport os\\nimport sys\\nimport json\\n\\ndef log_dbt_es():\\n RESULTS_FILE = os.environ[\\"RESULTS_FILE\\"]\\n DBT_RUN_LOGS_INDEX = os.environ[\\"DBT_RUN_LOGS_INDEX\\"]\\n DBT_TEST_LOGS_INDEX = os.environ[\\"DBT_TEST_LOGS_INDEX\\"]\\n es_cluster_cloud_id = os.environ[\\"ES_CLUSTER_CLOUD_ID\\"]\\n es_cluster_api_key = os.environ[\\"ES_CLUSTER_API_KEY\\"]\\n\\n\\n es_client = Elasticsearch(\\n cloud_id=es_cluster_cloud_id,\\n api_key=es_cluster_api_key,\\n request_timeout=120,\\n )\\n\\n\\n if not os.path.exists(RESULTS_FILE):\\n print(f\\"ERROR: {RESULTS_FILE} No dbt run results found.\\")\\n sys.exit(1)\\n\\n\\n with open(RESULTS_FILE, \\"r\\") as json_file:\\n results = json.load(json_file)\\n timestamp = results[\\"metadata\\"][\\"generated_at\\"]\\n metadata = results[\\"metadata\\"]\\n elapsed_time = results[\\"elapsed_time\\"]\\n args = results[\\"args\\"]\\n docs = []\\n for result in results[\\"results\\"]:\\n if result[\\"unique_id\\"].split(\\".\\")[0] == \\"test\\":\\n result[\\"_index\\"] = DBT_TEST_LOGS_INDEX\\n else:\\n result[\\"_index\\"] = DBT_RUN_LOGS_INDEX\\n result[\\"@timestamp\\"] = timestamp\\n result[\\"metadata\\"] = metadata\\n result[\\"elapsed_time\\"] = elapsed_time\\n result[\\"args\\"] = args\\n docs.append(result)\\n _ = helpers.bulk(es_client, docs)\\n return \\"Done\\"\\n\\n# Call the function\\nlog_dbt_es()\\n```\\n\\nIf you want to add/remove any other fields from `run_results.json`, you can modify the above function to do it.\\n\\nOnce the results are indexed, you can use Kibana to create Data Views for both indexes and start exploring them in Discover.\\n\\nGo to Discover, click on the data view selector on the top left and “Create a data view”.\\n\\n![2 - discover create a data view](/assets/images/monitor-dbt-pipelines-with-elastic-observability/discover-create-dataview.png)\\n\\nNow you can create a data view with your preferred name. Do this for both dbt run (`DBT_RUN_LOGS_INDEX` in your code) and dbt test (`DBT_TEST_LOGS_INDEX` in your code) indices:\\n\\n![3 - create a data view](/assets/images/monitor-dbt-pipelines-with-elastic-observability/create-dataview.png)\\n\\nGoing back to Discover, you’ll be able to select the Data Views and explore the data.\\n\\n![4 - discover logs explorer](/assets/images/monitor-dbt-pipelines-with-elastic-observability/discover-logs-explorer.png)\\n\\n## dbt run alerts, dashboards and ML jobs\\n\\nThe invocation of [`dbt run`](https://docs.getdbt.com/reference/commands/run) executes compiled SQL model files against the current database. `dbt run` invocation logs contain the [following fields](https://docs.getdbt.com/reference/artifacts/run-results-json):\\n\\n- `unique_id`: Unique model identifier\\n- `execution_time`: Total time spent executing this model run\\n\\nThe logs also contain the following metrics about the job execution from the adapter:\\n\\n- `adapter_response.bytes_processed`\\n- `adapter_response.bytes_billed`\\n- `adapter_response.slot_ms`\\n- `adapter_response.rows_affected`\\n\\nWe have used Kibana to set up [Anomaly Detection jobs](https://www.elastic.co/guide/en/machine-learning/current/ml-ad-run-jobs.html) on the above-mentioned metrics. You can configure a [multi-metric job](https://www.elastic.co/guide/en/machine-learning/current/ml-anomaly-detection-job-types.html#multi-metric-jobs) split by `unique_id` to be alerted when the sum of rows affected, slot time consumed, or bytes billed is anomalous per table. You can track one job per metric. If you have built a dashboard of the metrics per table, you can use [this shortcut](https://www.elastic.co/guide/en/machine-learning/8.14/ml-jobs-from-lens.html) to create the Anomaly Detection job directly from the visualization. After the jobs are created and are running on incoming data, you can [view the jobs](https://www.elastic.co/guide/en/machine-learning/current/ml-ad-view-results.html) and add them to a dashboard using the three dots button in the anomaly timeline:\\n\\n![5 - add ML job to dashboard](/assets/images/monitor-dbt-pipelines-with-elastic-observability/ml-job-add-to-dashboard.png)\\n\\nWe have used the [ML job to set up alerts](https://www.elastic.co/guide/en/machine-learning/current/ml-configuring-alerts.html) that send us emails/slack messages when anomalies are detected. Alerts can be created directly from the Jobs (Machine Learning > Anomaly Detection Jobs) page, by clicking on the three dots at the end of the ML job row:\\n\\n![6 - create alert from ML job](/assets/images/monitor-dbt-pipelines-with-elastic-observability/ml-job-create-alert.png)\\n\\nWe also use [Kibana dashboards](https://www.elastic.co/guide/en/kibana/current/dashboard.html) to visualize the anomaly detection job results and related metrics per table, to identify which tables consume most of our resources, to have visibility on their temporal evolution, and to measure aggregated metrics that can help us understand month over month changes.\\n\\n![7 - ML job in dashboard](/assets/images/monitor-dbt-pipelines-with-elastic-observability/ml-job-dashboard.png)\\n![8 - dashboard slot time chart](/assets/images/monitor-dbt-pipelines-with-elastic-observability/dashboard-slot-time.png)\\n![9 - dashboard aggregated metrics](/assets/images/monitor-dbt-pipelines-with-elastic-observability/dashboard-aggregated-metrics.png)\\n\\n## dbt test alerts and dashboards\\n\\nYou may already be familiar with [tests in dbt](https://docs.getdbt.com/docs/build/data-tests), but if you’re not, dbt data tests are assertions you make about your models. Using the command [`dbt test`](https://docs.getdbt.com/reference/commands/test), dbt will tell you if each test in your project passes or fails. [Here is an example of how to set them up](https://docs.getdbt.com/docs/build/data-tests#example). In our team, we use out-of-the-box dbt tests (`unique`, `not_null`, `accepted_values`, and `relationships`) and the packages [dbt_utils](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/) and [dbt_expectations](https://hub.getdbt.com/calogica/dbt_expectations/latest/) for some extra tests. When the command `dbt test` is run, it generates logs that are stored in `run_results.json`.\\n\\ndbt test logs contain the [following fields](https://docs.getdbt.com/reference/artifacts/run-results-json):\\n\\n- `unique_id`: Unique test identifier, tests contain the “test” prefix in their unique identifier\\n- `status`: result of the test, `pass` or `fail`\\n- `execution_time`: Total time spent executing this test\\n- `failures`: will be 0 if the test passes and 1 if the test fails\\n- `message`: If the test fails, reason why it failed\\n\\nThe logs also contain the metrics about the job execution from the adapter.\\n\\nWe have set up alerts on document count (see [guide](https://www.elastic.co/guide/en/observability/8.14/custom-threshold-alert.html)) that will send us an email / slack message when there are any failed tests. The rule for the alerts is set up on the dbt test Data View that we have created before, the query filtering on `status:fail` to obtain the logs for the tests that have failed, and the rule condition is document count bigger than 0.\\nWhenever there is a failure in any test in production, we get an alert with links to the alert details and dashboards to be able to troubleshoot them:\\n\\n![10 - alert](/assets/images/monitor-dbt-pipelines-with-elastic-observability/email-alert.png)\\n\\nWe have also built a dashboard to visualize the tests run, tests failed, and their execution time and slot time to have a historical view of the test run:\\n\\n![11 - dashboard dbt tests](/assets/images/monitor-dbt-pipelines-with-elastic-observability/dashboard-tests.png)\\n\\n## Finding Root Causes with the AI Assistant\\n\\nThe most effective way for us to analyze these multiple sources of information is using the AI Assistant to help us troubleshoot the incidents. In our case, we got an alert about a test failure, and we used the AI Assistant to give us context on what happened. Then we asked if there were any downstream consequences, and the AI Assistant interpreted the results of the Anomaly Detection job, which indicated a spike in slot time for one of our downstream tables and the increase of the slot time vs. the baseline. Then, we asked for the root cause, and the AI Assistant was able to find and provide us a link to a PR from our Github changelog that matched the start of the incident and was the most probable cause.\\n\\n![12 - ai assistant troubleshoot](/assets/images/monitor-dbt-pipelines-with-elastic-observability/ai-assistant.png)\\n\\n## Conclusion\\n\\nAs a Data Analytics team, we are responsible for guaranteeing that the tables, charts, models, reports, and dashboards we provide to stakeholders are accurate and contain the right sources of information. As teams grow, the number of models we own becomes larger and more interconnected, and it isn’t easy to guarantee that everything is running smoothly and providing accurate results. Having a monitoring system that proactively alerts us on cost spikes, anomalies in row counts, or data quality test failures is like having a trusted companion that will alert you in advance if something goes wrong and help you get to the root cause of the issue.\\n\\ndbt invocation logs are a crucial source of information about the status of our data pipelines, and Elastic is the perfect tool to extract the maximum potential out of them. Use this blog post as a starting point for utilizing your dbt logs to help your team achieve greater reliability and peace of mind, allowing them to focus on more strategic tasks rather than worrying about potential data issues.\\n","code":"var Component=(()=>{var u=Object.create;var s=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var b=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)s(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of p(e))!g.call(n,o)&&o!==i&&s(n,o,{get:()=>e[o],enumerable:!(a=m(e,o))||a.enumerable});return n};var y=(n,e,i)=>(i=n!=null?u(b(n)):{},r(e||!n||!n.__esModule?s(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>r(s({},\\"__esModule\\",{value:!0}),n);var d=f((x,l)=>{l.exports=_jsx_runtime});var E={};w(E,{default:()=>h,frontmatter:()=>_});var t=y(d()),_={title:\\"Monitor dbt pipelines with Elastic Observability\\",slug:\\"monitor-dbt-pipelines-with-elastic-observability\\",date:\\"2024-07-26\\",description:\\"Learn how to set up a dbt monitoring system with Elastic that proactively alerts on data processing cost spikes, anomalies in rows per table, and data quality test failures\\",author:[{slug:\\"almudena-sanz-olive\\"},{slug:\\"tamara-dancheva\\"}],image:\\"monitoring-dbt-with-elastic.png\\",tags:[{slug:\\"log-analytics\\"},{slug:\\"aiops\\"},{slug:\\"ai-assistant\\"},{slug:\\"genai\\"}]};function c(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"In the Data Analytics team within the Observability organization in Elastic, we use \\",(0,t.jsx)(e.a,{href:\\"https://www.getdbt.com/product/what-is-dbt\\",rel:\\"nofollow\\",children:\\"dbt (dbt\\\\u2122, data build tool)\\"}),\\" to execute our SQL data transformation pipelines. dbt is a SQL-first transformation workflow that lets teams quickly and collaboratively deploy analytics code. In particular, we use \\",(0,t.jsx)(e.a,{href:\\"https://docs.getdbt.com/docs/core/installation-overview\\",rel:\\"nofollow\\",children:\\"dbt core\\"}),\\", the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/dbt-labs/dbt-core\\",rel:\\"nofollow\\",children:\\"open-source project\\"}),\\", where you can develop from the command line and run your dbt project.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Our data transformation pipelines run daily and process the data that feed our internal dashboards, reports, analyses, and Machine Learning (ML) models.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"There have been incidents in the past when the pipelines have failed, the source tables contained wrong data or we have introduced a change into our SQL code that has caused data quality issues, and we only realized once we saw it in a weekly report that was showing an anomalous number of records. That\\\\u2019s why we have built a monitoring system that proactively alerts us about these types of incidents as soon as they happen and helps us with visualizations and analyses to understand their root cause, saving us several hours or days of manual investigations.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We have leveraged our own Observability Solution to help solve this challenge, monitoring the entire lifecycle of our dbt implementation. This setup enables us to track the behavior of our models and conduct data quality testing on the final tables. We export dbt process logs from run jobs and tests into Elasticsearch and utilize Kibana to create dashboards, set up alerts, and configure Machine Learning jobs to monitor and assess issues.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The following diagram shows our complete architecture. In a follow-up article, we\\\\u2019ll also cover how we observe our python data processing and ML model processes using OTEL and Elastic - stay tuned.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/architecture.png\\",alt:\\"1 - architecture\\",width:\\"1590\\",height:\\"888\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"why-monitor-dbt-pipelines-with-elastic\\",children:\\"Why monitor dbt pipelines with Elastic?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With every invocation, dbt generates and saves one or more JSON files called \\",(0,t.jsx)(e.a,{href:\\"https://docs.getdbt.com/reference/artifacts/dbt-artifacts\\",rel:\\"nofollow\\",children:\\"artifacts\\"}),\\" containing log data on the invocation results. \\",(0,t.jsx)(e.code,{children:\\"dbt run\\"}),\\" and \\",(0,t.jsx)(e.code,{children:\\"dbt test\\"}),\\" invocation logs are \\",(0,t.jsxs)(e.a,{href:\\"https://docs.getdbt.com/reference/artifacts/run-results-json\\",rel:\\"nofollow\\",children:[\\"stored in the file \\",(0,t.jsx)(e.code,{children:\\"run_results.json\\"})]}),\\", as per the dbt documentation:\\"]}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"This file contains information about a completed invocation of dbt, including timing and status info for each node (model, test, etc) that was executed. In aggregate, many \\",(0,t.jsx)(e.code,{children:\\"run_results.json\\"}),\\" can be combined to calculate average model runtime, test failure rates, the number of record changes captured by snapshots, etc.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Monitoring \\",(0,t.jsx)(e.code,{children:\\"dbt run\\"}),\\" invocation logs can help solve several issues, including tracking and alerting about table volumes, detecting excessive slot time from resource-intensive models, identifying cost spikes due to slot time or volume, and pinpointing slow execution times that may indicate scheduling issues. This system was crucial when we merged a PR with a change in our code that had an issue, producing a sudden drop in the number of daily rows in upstream Table A. By ingesting the \\",(0,t.jsx)(e.code,{children:\\"dbt run\\"}),\\" logs into Elastic, our anomaly detection job quickly identified anomalies in the daily row counts for Table A and its downstream tables, B, C, and D. The Data Analytics team received an alert notification about the issue, allowing us to promptly troubleshoot, fix and backfill the tables before it affected the weekly dashboards and downstream ML models.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Monitoring \\",(0,t.jsx)(e.code,{children:\\"dbt test\\"}),\\" invocation logs can also address several issues, such as identifying duplicates in tables, detecting unnoticed alterations in allowed values for specific fields through validation of all enum fields, and resolving various other data processing and quality concerns. With dashboards and alerts on data quality tests, we proactively identify issues like duplicate keys, unexpected category values, and increased nulls, ensuring data integrity. In our team, we had an issue where a change in one of our raw lookup tables produced duplicated rows in our user table, doubling the number of users reported. By ingesting the \\",(0,t.jsx)(e.code,{children:\\"dbt test\\"}),\\" logs into Elastic, our rules detected that some duplicate tests had failed. The team received an alert notification about the issue, allowing us to troubleshoot it right away by finding the upstream table that was the root cause. These duplicates meant that downstream tables had to process 2x the amount of data, creating a spike in the bytes processed and slot time. The anomaly detection and alerts on the \\",(0,t.jsx)(e.code,{children:\\"dbt run\\"}),\\" logs also helped us spot these spikes for individual tables and allowed us to quantify the impact on our billing.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Processing our dbt logs with Elastic and Kibana allows us to obtain real-time insights, helps us quickly troubleshoot potential issues, and keeps our data transformation processes running smoothly. We set up anomaly detection jobs and alerts in Kibana to monitor the number of rows processed by dbt, the slot time, and the results of the tests. This lets us catch real-time incidents, and by promptly identifying and fixing these issues, Elastic makes our data pipeline more resilient and our models more cost-effective, helping us stay on top of cost spikes or data quality issues.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We can also correlate this information with other events ingested into Elastic, for example using the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/enterprise-search/current/connectors-github.html\\",rel:\\"nofollow\\",children:\\"Elastic Github connector\\"}),\\", we can correlate data quality test failures or other anomalies with code changes to find the root cause of the commit or PR that caused the issues. By ingesting application logs into Elastic, we can also analyze if these issues in our pipelines have affected downstream applications, increasing latency, throughput or error rates using APM. Ingesting billing, revenue data or web traffic, we could also see the impact in business metrics.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"how-to-export-dbt-invocation-logs-to-elasticsearch\\",children:\\"How to export dbt invocation logs to Elasticsearch\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We use the \\",(0,t.jsx)(e.a,{href:\\"https://elasticsearch-py.readthedocs.io/en\\",rel:\\"nofollow\\",children:\\"Python Elasticsearch client\\"}),\\" to send the dbt invocation logs to Elastic after we run our \\",(0,t.jsx)(e.code,{children:\\"dbt run\\"}),\\" and \\",(0,t.jsx)(e.code,{children:\\"dbt test\\"}),\\" processes daily in production. The setup just requires you to install the \\",(0,t.jsx)(e.a,{href:\\"https://elasticsearch-py.readthedocs.io/en/v8.14.0/quickstart.html#installation\\",rel:\\"nofollow\\",children:\\"Elasticsearch Python client\\"}),\\" and obtain your Elastic Cloud ID (go to \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/deployments/\\",rel:\\"nofollow\\",children:\\"https://cloud.elastic.co/deployments/\\"}),\\", select your deployment and find the \\",(0,t.jsx)(e.code,{children:\\"Cloud ID\\"}),\\") and Elastic Cloud API Key \\",(0,t.jsx)(e.a,{href:\\"https://elasticsearch-py.readthedocs.io/en/v8.14.0/quickstart.html#connecting\\",rel:\\"nofollow\\",children:\\"(following this guide)\\"})]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This python helper function will index the results from your \\",(0,t.jsx)(e.code,{children:\\"run_results.json\\"}),\\" file to the specified index. You just need to export the variables to the environment:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"RESULTS_FILE\\"}),\\": path to your \\",(0,t.jsx)(e.code,{children:\\"run_results.json\\"}),\\" file\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"DBT_RUN_LOGS_INDEX\\"}),\\": the name you want to give to dbt run logs index in Elastic, e.g. \\",(0,t.jsx)(e.code,{children:\\"dbt_run_logs\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"DBT_TEST_LOGS_INDEX\\"}),\\": the name you want to give to the dbt test logs index in Elastic, e.g. \\",(0,t.jsx)(e.code,{children:\\"dbt_test_logs\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.code,{children:\\"ES_CLUSTER_CLOUD_ID\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.code,{children:\\"ES_CLUSTER_API_KEY\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then call the function \\",(0,t.jsx)(e.code,{children:\\"log_dbt_es\\"}),\\" from your python code or save this code as a python script and run it after executing your \\",(0,t.jsx)(e.code,{children:\\"dbt run\\"}),\\" or \\",(0,t.jsx)(e.code,{children:\\"dbt test\\"}),\\" commands:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`from elasticsearch import Elasticsearch, helpers\\nimport os\\nimport sys\\nimport json\\n\\ndef log_dbt_es():\\n RESULTS_FILE = os.environ[\\"RESULTS_FILE\\"]\\n DBT_RUN_LOGS_INDEX = os.environ[\\"DBT_RUN_LOGS_INDEX\\"]\\n DBT_TEST_LOGS_INDEX = os.environ[\\"DBT_TEST_LOGS_INDEX\\"]\\n es_cluster_cloud_id = os.environ[\\"ES_CLUSTER_CLOUD_ID\\"]\\n es_cluster_api_key = os.environ[\\"ES_CLUSTER_API_KEY\\"]\\n\\n\\n es_client = Elasticsearch(\\n cloud_id=es_cluster_cloud_id,\\n api_key=es_cluster_api_key,\\n request_timeout=120,\\n )\\n\\n\\n if not os.path.exists(RESULTS_FILE):\\n print(f\\"ERROR: {RESULTS_FILE} No dbt run results found.\\")\\n sys.exit(1)\\n\\n\\n with open(RESULTS_FILE, \\"r\\") as json_file:\\n results = json.load(json_file)\\n timestamp = results[\\"metadata\\"][\\"generated_at\\"]\\n metadata = results[\\"metadata\\"]\\n elapsed_time = results[\\"elapsed_time\\"]\\n args = results[\\"args\\"]\\n docs = []\\n for result in results[\\"results\\"]:\\n if result[\\"unique_id\\"].split(\\".\\")[0] == \\"test\\":\\n result[\\"_index\\"] = DBT_TEST_LOGS_INDEX\\n else:\\n result[\\"_index\\"] = DBT_RUN_LOGS_INDEX\\n result[\\"@timestamp\\"] = timestamp\\n result[\\"metadata\\"] = metadata\\n result[\\"elapsed_time\\"] = elapsed_time\\n result[\\"args\\"] = args\\n docs.append(result)\\n _ = helpers.bulk(es_client, docs)\\n return \\"Done\\"\\n\\n# Call the function\\nlog_dbt_es()\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you want to add/remove any other fields from \\",(0,t.jsx)(e.code,{children:\\"run_results.json\\"}),\\", you can modify the above function to do it.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once the results are indexed, you can use Kibana to create Data Views for both indexes and start exploring them in Discover.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Go to Discover, click on the data view selector on the top left and \\\\u201CCreate a data view\\\\u201D.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/discover-create-dataview.png\\",alt:\\"2 - discover create a data view\\",width:\\"502\\",height:\\"388\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now you can create a data view with your preferred name. Do this for both dbt run (\\",(0,t.jsx)(e.code,{children:\\"DBT_RUN_LOGS_INDEX\\"}),\\" in your code) and dbt test (\\",(0,t.jsx)(e.code,{children:\\"DBT_TEST_LOGS_INDEX\\"}),\\" in your code) indices:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/create-dataview.png\\",alt:\\"3 - create a data view\\",width:\\"2000\\",height:\\"1172\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Going back to Discover, you\\\\u2019ll be able to select the Data Views and explore the data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/discover-logs-explorer.png\\",alt:\\"4 - discover logs explorer\\",width:\\"1632\\",height:\\"780\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"dbt-run-alerts-dashboards-and-ml-jobs\\",children:\\"dbt run alerts, dashboards and ML jobs\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The invocation of \\",(0,t.jsx)(e.a,{href:\\"https://docs.getdbt.com/reference/commands/run\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.code,{children:\\"dbt run\\"})}),\\" executes compiled SQL model files against the current database. \\",(0,t.jsx)(e.code,{children:\\"dbt run\\"}),\\" invocation logs contain the \\",(0,t.jsx)(e.a,{href:\\"https://docs.getdbt.com/reference/artifacts/run-results-json\\",rel:\\"nofollow\\",children:\\"following fields\\"}),\\":\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"unique_id\\"}),\\": Unique model identifier\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"execution_time\\"}),\\": Total time spent executing this model run\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The logs also contain the following metrics about the job execution from the adapter:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.code,{children:\\"adapter_response.bytes_processed\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.code,{children:\\"adapter_response.bytes_billed\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.code,{children:\\"adapter_response.slot_ms\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.code,{children:\\"adapter_response.rows_affected\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We have used Kibana to set up \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-ad-run-jobs.html\\",rel:\\"nofollow\\",children:\\"Anomaly Detection jobs\\"}),\\" on the above-mentioned metrics. You can configure a \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-anomaly-detection-job-types.html#multi-metric-jobs\\",rel:\\"nofollow\\",children:\\"multi-metric job\\"}),\\" split by \\",(0,t.jsx)(e.code,{children:\\"unique_id\\"}),\\" to be alerted when the sum of rows affected, slot time consumed, or bytes billed is anomalous per table. You can track one job per metric. If you have built a dashboard of the metrics per table, you can use \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/8.14/ml-jobs-from-lens.html\\",rel:\\"nofollow\\",children:\\"this shortcut\\"}),\\" to create the Anomaly Detection job directly from the visualization. After the jobs are created and are running on incoming data, you can \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-ad-view-results.html\\",rel:\\"nofollow\\",children:\\"view the jobs\\"}),\\" and add them to a dashboard using the three dots button in the anomaly timeline:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/ml-job-add-to-dashboard.png\\",alt:\\"5 - add ML job to dashboard\\",width:\\"2126\\",height:\\"930\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We have used the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-configuring-alerts.html\\",rel:\\"nofollow\\",children:\\"ML job to set up alerts\\"}),\\" that send us emails/slack messages when anomalies are detected. Alerts can be created directly from the Jobs (Machine Learning > Anomaly Detection Jobs) page, by clicking on the three dots at the end of the ML job row:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/ml-job-create-alert.png\\",alt:\\"6 - create alert from ML job\\",width:\\"2160\\",height:\\"590\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We also use \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/dashboard.html\\",rel:\\"nofollow\\",children:\\"Kibana dashboards\\"}),\\" to visualize the anomaly detection job results and related metrics per table, to identify which tables consume most of our resources, to have visibility on their temporal evolution, and to measure aggregated metrics that can help us understand month over month changes.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/ml-job-dashboard.png\\",alt:\\"7 - ML job in dashboard\\",width:\\"1622\\",height:\\"1084\\"}),`\\n`,(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/dashboard-slot-time.png\\",alt:\\"8 - dashboard slot time chart\\",width:\\"1630\\",height:\\"516\\"}),`\\n`,(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/dashboard-aggregated-metrics.png\\",alt:\\"9 - dashboard aggregated metrics\\",width:\\"1616\\",height:\\"316\\"})]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"dbt-test-alerts-and-dashboards\\",children:\\"dbt test alerts and dashboards\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You may already be familiar with \\",(0,t.jsx)(e.a,{href:\\"https://docs.getdbt.com/docs/build/data-tests\\",rel:\\"nofollow\\",children:\\"tests in dbt\\"}),\\", but if you\\\\u2019re not, dbt data tests are assertions you make about your models. Using the command \\",(0,t.jsx)(e.a,{href:\\"https://docs.getdbt.com/reference/commands/test\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.code,{children:\\"dbt test\\"})}),\\", dbt will tell you if each test in your project passes or fails. \\",(0,t.jsx)(e.a,{href:\\"https://docs.getdbt.com/docs/build/data-tests#example\\",rel:\\"nofollow\\",children:\\"Here is an example of how to set them up\\"}),\\". In our team, we use out-of-the-box dbt tests (\\",(0,t.jsx)(e.code,{children:\\"unique\\"}),\\", \\",(0,t.jsx)(e.code,{children:\\"not_null\\"}),\\", \\",(0,t.jsx)(e.code,{children:\\"accepted_values\\"}),\\", and \\",(0,t.jsx)(e.code,{children:\\"relationships\\"}),\\") and the packages \\",(0,t.jsx)(e.a,{href:\\"https://hub.getdbt.com/dbt-labs/dbt_utils/latest/\\",rel:\\"nofollow\\",children:\\"dbt_utils\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://hub.getdbt.com/calogica/dbt_expectations/latest/\\",rel:\\"nofollow\\",children:\\"dbt_expectations\\"}),\\" for some extra tests. When the command \\",(0,t.jsx)(e.code,{children:\\"dbt test\\"}),\\" is run, it generates logs that are stored in \\",(0,t.jsx)(e.code,{children:\\"run_results.json\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"dbt test logs contain the \\",(0,t.jsx)(e.a,{href:\\"https://docs.getdbt.com/reference/artifacts/run-results-json\\",rel:\\"nofollow\\",children:\\"following fields\\"}),\\":\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"unique_id\\"}),\\": Unique test identifier, tests contain the \\\\u201Ctest\\\\u201D prefix in their unique identifier\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"status\\"}),\\": result of the test, \\",(0,t.jsx)(e.code,{children:\\"pass\\"}),\\" or \\",(0,t.jsx)(e.code,{children:\\"fail\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"execution_time\\"}),\\": Total time spent executing this test\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"failures\\"}),\\": will be 0 if the test passes and 1 if the test fails\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"message\\"}),\\": If the test fails, reason why it failed\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The logs also contain the metrics about the job execution from the adapter.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We have set up alerts on document count (see \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.14/custom-threshold-alert.html\\",rel:\\"nofollow\\",children:\\"guide\\"}),\\") that will send us an email / slack message when there are any failed tests. The rule for the alerts is set up on the dbt test Data View that we have created before, the query filtering on \\",(0,t.jsx)(e.code,{children:\\"status:fail\\"}),` to obtain the logs for the tests that have failed, and the rule condition is document count bigger than 0.\\nWhenever there is a failure in any test in production, we get an alert with links to the alert details and dashboards to be able to troubleshoot them:`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/email-alert.png\\",alt:\\"10 - alert\\",width:\\"904\\",height:\\"640\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We have also built a dashboard to visualize the tests run, tests failed, and their execution time and slot time to have a historical view of the test run:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/dashboard-tests.png\\",alt:\\"11 - dashboard dbt tests\\",width:\\"1278\\",height:\\"1014\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"finding-root-causes-with-the-ai-assistant\\",children:\\"Finding Root Causes with the AI Assistant\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The most effective way for us to analyze these multiple sources of information is using the AI Assistant to help us troubleshoot the incidents. In our case, we got an alert about a test failure, and we used the AI Assistant to give us context on what happened. Then we asked if there were any downstream consequences, and the AI Assistant interpreted the results of the Anomaly Detection job, which indicated a spike in slot time for one of our downstream tables and the increase of the slot time vs. the baseline. Then, we asked for the root cause, and the AI Assistant was able to find and provide us a link to a PR from our Github changelog that matched the start of the incident and was the most probable cause.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-dbt-pipelines-with-elastic-observability/ai-assistant.png\\",alt:\\"12 - ai assistant troubleshoot\\",width:\\"968\\",height:\\"1098\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As a Data Analytics team, we are responsible for guaranteeing that the tables, charts, models, reports, and dashboards we provide to stakeholders are accurate and contain the right sources of information. As teams grow, the number of models we own becomes larger and more interconnected, and it isn\\\\u2019t easy to guarantee that everything is running smoothly and providing accurate results. Having a monitoring system that proactively alerts us on cost spikes, anomalies in row counts, or data quality test failures is like having a trusted companion that will alert you in advance if something goes wrong and help you get to the root cause of the issue.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"dbt invocation logs are a crucial source of information about the status of our data pipelines, and Elastic is the perfect tool to extract the maximum potential out of them. Use this blog post as a starting point for utilizing your dbt logs to help your team achieve greater reliability and peace of mind, allowing them to focus on more strategic tasks rather than worrying about potential data issues.\\"})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(c,{...n})}):c(n)}return v(E);})();\\n;return Component;"},"_id":"articles/monitor-dbt-pipelines-with-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/monitor-dbt-pipelines-with-elastic-observability.mdx","sourceFileName":"monitor-dbt-pipelines-with-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/monitor-dbt-pipelines-with-elastic-observability"},"type":"Article","imageUrl":"/assets/images/monitor-dbt-pipelines-with-elastic-observability/monitoring-dbt-with-elastic.png","readingTime":"13 min read","url":"/monitor-dbt-pipelines-with-elastic-observability","headings":[{"level":2,"title":"Why monitor dbt pipelines with Elastic?","href":"#why-monitor-dbt-pipelines-with-elastic"},{"level":2,"title":"How to export dbt invocation logs to Elasticsearch","href":"#how-to-export-dbt-invocation-logs-to-elasticsearch"},{"level":2,"title":"dbt run alerts, dashboards and ML jobs","href":"#dbt-run-alerts-dashboards-and-ml-jobs"},{"level":2,"title":"dbt test alerts and dashboards","href":"#dbt-test-alerts-and-dashboards"},{"level":2,"title":"Finding Root Causes with the AI Assistant","href":"#finding-root-causes-with-the-ai-assistant"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Monitor OpenAI API and GPT models with OpenTelemetry and Elastic","slug":"monitor-openai-api-gpt-models-opentelemetry","date":"2023-04-04","description":"Get ready to be blown away by this game-changing approach to monitoring cutting-edge ChatGPT applications! As the ChatGPT phenomenon takes the world by storm, it\'s time to supercharge your monitoring game with OpenTelemetry and Elastic Observability.","image":"opentelemetry-graphic-ad-2-1920x1080.png","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"openai","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}}],"body":{"raw":"\\nChatGPT is so hot right now, it broke the internet. As an avid user of ChatGPT and a developer of ChatGPT applications, I am incredibly excited by the possibilities of this technology. What I see happening is that there will be exponential growth of ChatGPT-based solutions, and people are going to need to monitor those solutions.\\n\\nSince this is a pretty new technology, we wouldn’t want to burden our shiny new code with proprietary technology, would we? No, we would not, and that is why we are going to use OpenTelemetry to monitor our ChatGPT code in this blog. This is particularly relevant for me as I recently created a service to generate meeting notes from Zoom calls. If I am to release this into the wild, how much is it going to cost me and how do I make sure it is available?\\n\\n## OpenAI APIs to the rescue\\n\\nThe OpenAI API is pretty awesome, there is no doubt. It also gives us the information shown below in each response to each API call, which can help us with understanding what we are being charged. By using the token counts, the model, and the pricing that OpenAI has put up on its website, we can calculate the cost. The question is, how do we get this information into our monitoring tools?\\n\\n```python\\n{\\n \\"choices\\": [\\n {\\n \\"finish_reason\\": \\"length\\",\\n \\"index\\": 0,\\n \\"logprobs\\": null,\\n \\"text\\": \\"\\\\n\\\\nElastic is an amazing observability tool because it provides a comprehensive set of features for monitoring\\"\\n }\\n ],\\n \\"created\\": 1680281710,\\n \\"id\\": \\"cmpl-70CJq07gibupTcSM8xOWekOTV5FRF\\",\\n \\"model\\": \\"text-davinci-003\\",\\n \\"object\\": \\"text_completion\\",\\n \\"usage\\": {\\n \\"completion_tokens\\": 20,\\n \\"prompt_tokens\\": 9,\\n \\"total_tokens\\": 29\\n }\\n}\\n```\\n\\n## OpenTelemetry to the rescue\\n\\n[OpenTelemetry](https://www.elastic.co/blog/opentelemetry-observability) is truly a fantastic piece of work. It has had so much adoption and work committed to it over the years, and it seems to really be getting to the point where we can call it the Linux of Observability. We can use it to record logs, metrics, and traces and get those in a vendor neutral way into our favorite observability tool — in this case, Elastic Observability.\\n\\nWith the latest and greatest otel libraries in Python, we can auto-instrument external calls, and this will help us understand how OpenAI calls are performing. Let\'s take a sneak peek at our sample Python application, which implements Flask and the ChatGPT API and also has OpenTelemetry. If you want to try this yourself, take a look at the GitHub link at the end of this blog and follow these steps.\\n\\n### Set up Elastic Cloud account (if you already don’t have one)\\n\\n1. Sign up for a two-week free trial at [https://www.elastic.co/cloud/elasticsearch-service/signup](https://www.elastic.co/cloud/elasticsearch-service/signup).\\n2. Create a deployment.\\n\\nOnce you are logged in, click **Add integrations**.\\n\\n![elastic cloud deployment add integrations](/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-cloud-deployment-add-integrations.png)\\n\\nClick on **APM Integration**.\\n\\n![elastic apm integration](/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-apm-integration.png)\\n\\nThen scroll down to get the details you need for this blog:\\n\\n![elastic opentelemetry download](/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-opentelemetry-download.png)\\n\\nBe sure to set the following Environment variables, replacing the variables with data you get from Elastic as above and OpenAI from [here](https://platform.openai.com/account/api-keys), and then run these export commands on the command line.\\n\\n```bash\\nexport OPEN_AI_KEY=sk-abcdefgh5ijk2l173mnop3qrstuvwxyzab2cde47fP2g9jij\\nexport OTEL_EXPORTER_OTLP_AUTH_HEADER=abc9ldeofghij3klmn\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=https://123456abcdef.apm.us-west2.gcp.elastic-cloud.com:443\\n```\\n\\nAnd install the following Python libraries:\\n\\n```python\\npip3 install opentelemetry-api\\npip3 install opentelemetry-sdk\\npip3 install opentelemetry-exporter-otlp\\npip3 install opentelemetry-instrumentation\\npip3 install opentelemetry-instrumentation-requests\\npip3 install openai\\npip3 install flask\\n```\\n\\nHere is a look at the code we are using for the example application. In the real world, this would be your own code. All this does is call OpenAI APIs with the following message: “Why is Elastic an amazing observability tool?”\\n\\n```python\\nimport openai\\nfrom flask import Flask\\nimport monitor # Import the module\\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\nimport urllib\\nimport os\\nfrom opentelemetry import trace\\nfrom opentelemetry.sdk.resources import SERVICE_NAME, Resource\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\\n\\n# OpenTelemetry setup up code here, feel free to replace the “your-service-name” attribute here.\\nresource = Resource(attributes={\\n SERVICE_NAME: \\"your-service-name\\"\\n})\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(OTLPSpanExporter(endpoint=os.getenv(\'OTEL_EXPORTER_OTLP_ENDPOINT\'),\\n headers=\\"Authorization=Bearer%20\\"+os.getenv(\'OTEL_EXPORTER_OTLP_AUTH_HEADER\')))\\nprovider.add_span_processor(processor)\\ntrace.set_tracer_provider(provider)\\ntracer = trace.get_tracer(__name__)\\nRequestsInstrumentor().instrument()\\n\\n\\n\\n# Initialize Flask app and instrument it\\n\\napp = Flask(__name__)\\n# Set OpenAI API key\\nopenai.api_key = os.getenv(\'OPEN_AI_KEY\')\\n\\n\\n@app.route(\\"/completion\\")\\n@tracer.start_as_current_span(\\"do_work\\")\\ndef completion():\\n response = openai.Completion.create(\\n model=\\"text-davinci-003\\",\\n prompt=\\"Why is Elastic an amazing observability tool?\\",\\n max_tokens=20,\\n temperature=0\\n )\\n return response.choices[0].text.strip()\\n\\nif __name__ == \\"__main__\\":\\n app.run()\\n```\\n\\nThis code should be fairly familiar to anyone who has implemented OpenTelemetry with Python here — there is no specific magic. The magic happens inside the “monitor” code that you can use freely to instrument your own OpenAI applications.\\n\\n## Monkeying around\\n\\nInside the monitor.py code, you will see we do something called “Monkey Patching.” Monkey patching is a technique in Python where you dynamically modify the behavior of a class or module at runtime by modifying its attributes or methods. Monkey patching allows you to change the functionality of a class or module without having to modify its source code. It can be useful in situations where you need to modify the behavior of an existing class or module that you don\'t have control over or cannot modify directly.\\n\\nWhat we want to do here is modify the behavior of the “Completion” call so we can steal the response metrics and add them to our OpenTelemetry spans. You can see how we do that below:\\n\\n```python\\ndef count_completion_requests_and_tokens(func):\\n @wraps(func)\\n def wrapper(*args, **kwargs):\\n counters[\'completion_count\'] += 1\\n response = func(*args, **kwargs)\\n token_count = response.usage.total_tokens\\n prompt_tokens = response.usage.prompt_tokens\\n completion_tokens = response.usage.completion_tokens\\n cost = calculate_cost(response)\\n strResponse = json.dumps(response)\\n # Set OpenTelemetry attributes\\n span = trace.get_current_span()\\n if span:\\n span.set_attribute(\\"completion_count\\", counters[\'completion_count\'])\\n span.set_attribute(\\"token_count\\", token_count)\\n span.set_attribute(\\"prompt_tokens\\", prompt_tokens)\\n span.set_attribute(\\"completion_tokens\\", completion_tokens)\\n span.set_attribute(\\"model\\", response.model)\\n span.set_attribute(\\"cost\\", cost)\\n span.set_attribute(\\"response\\", strResponse)\\n return response\\n return wrapper\\n# Monkey-patch the openai.Completion.create function\\nopenai.Completion.create = count_completion_requests_and_tokens(openai.Completion.create)\\n```\\n\\nBy adding all this data to our Span, we can actually send it to our OpenTelemetry OTLP endpoint (in this case it will be Elastic). The benefit of doing this is that you can easily use the data for search or to build dashboards and visualizations. In the final step, we also want to calculate the cost. We do this by implementing the following function, which will calculate the cost of a single request to the OpenAI APIs.\\n\\n```python\\ndef calculate_cost(response):\\n if response.model in [\'gpt-4\', \'gpt-4-0314\']:\\n cost = (response.usage.prompt_tokens * 0.03 + response.usage.completion_tokens * 0.06) / 1000\\n elif response.model in [\'gpt-4-32k\', \'gpt-4-32k-0314\']:\\n cost = (response.usage.prompt_tokens * 0.06 + response.usage.completion_tokens * 0.12) / 1000\\n elif \'gpt-3.5-turbo\' in response.model:\\n cost = response.usage.total_tokens * 0.002 / 1000\\n elif \'davinci\' in response.model:\\n cost = response.usage.total_tokens * 0.02 / 1000\\n elif \'curie\' in response.model:\\n cost = response.usage.total_tokens * 0.002 / 1000\\n elif \'babbage\' in response.model:\\n cost = response.usage.total_tokens * 0.0005 / 1000\\n elif \'ada\' in response.model:\\n cost = response.usage.total_tokens * 0.0004 / 1000\\n else:\\n cost = 0\\n return cost\\n```\\n\\n## Elastic to the rescue\\n\\nOnce we are capturing all this data, it’s time to have some fun with it in Elastic. In Discover, we can see all the data points we sent over using the OpenTelemetry library:\\n\\n![elastic discover apm](/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-discover-apm.png)\\n\\nWith these labels in place, it is very easy to build a dashboard. Take a look at this one I built earlier ([which is also checked into my GitHub Repository](https://github.com/davidgeorgehope/ChatGPTMonitoringWithOtel/blob/main/chatGPTDashboard.ndjson)):\\n\\n![elastic labels dashboard](/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-labels-dashboard.png)\\n\\nWe can also see Transactions, Latency of the OpenAI service, and all the spans related to our ChatGPT service calls.\\n\\n![observability service name](/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-observability-service-name.png)\\n\\n![elastic your service name](/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-your-service-name.png)\\n\\n![elastic api openai](/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-api-openai.png)\\n\\nIn the transaction view, we can also see how long specific OpenAI calls have taken:\\n\\n![elastic latency distribution](/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-latency-distribution.png)\\n\\nSome requests to OpenAI here have taken over 3 seconds. ChatGPT can be very slow, so it’s important for us to understand how slow this is and if users are becoming frustrated.\\n\\n## Summary\\n\\nWe looked at monitoring ChatGPT with OpenTelemetry with Elastic. ChatGPT is a worldwide phenomenon and it’s going to no doubt grow and grow, and pretty soon everyone will be using it. Because it can be slow to get responses out, it is critical that people are able to understand the performance of any code that is using this service.\\n\\nThere is also the issue of cost, since it’s incredibly important to understand if this service is eating into your margins and if what you are asking for is profitable for your business. With the current economic environment, we have to keep an eye on profitability.\\n\\nTake a look at the code for this solution [here](https://github.com/davidgeorgehope/ChatGPTMonitoringWithOtel). And please feel free to use the “monitor” library to instrument your own OpenAI code.\\n\\nInterested in learning more about Elastic Observability? Check out the following resources:\\n\\n- [An Introduction to Elastic Observability](https://www.elastic.co/virtual-events/intro-to-elastic-observability)\\n- [Observability Fundamentals Training](https://www.elastic.co/training/observability-fundamentals)\\n- [Watch an Elastic Observability demo](https://www.elastic.co/observability/demo)\\n- [Observability Predictions and Trends for 2023](https://www.elastic.co/blog/observability-predictions-trends-2023)\\n\\nAnd sign up for our [Elastic Observability Trends Webinar](https://www.elastic.co/virtual-events/emerging-trends-in-observability) featuring AWS and Forrester, not to be missed!\\n\\n_In this blog post, we may have used third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var d=Object.create;var a=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var o in e)a(n,o,{get:e[o],enumerable:!0})},r=(n,e,o,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of u(e))!y.call(n,i)&&i!==o&&a(n,i,{get:()=>e[i],enumerable:!(s=m(e,i))||s.enumerable});return n};var f=(n,e,o)=>(o=n!=null?d(g(n)):{},r(e||!n||!n.__esModule?a(o,\\"default\\",{value:n,enumerable:!0}):o,n)),_=n=>r(a({},\\"__esModule\\",{value:!0}),n);var c=w((O,l)=>{l.exports=_jsx_runtime});var k={};b(k,{default:()=>h,frontmatter:()=>v});var t=f(c()),v={title:\\"Monitor OpenAI API and GPT models with OpenTelemetry and Elastic\\",slug:\\"monitor-openai-api-gpt-models-opentelemetry\\",date:\\"2023-04-04\\",description:\\"Get ready to be blown away by this game-changing approach to monitoring cutting-edge ChatGPT applications! As the ChatGPT phenomenon takes the world by storm, it\'s time to supercharge your monitoring game with OpenTelemetry and Elastic Observability.\\",author:[{slug:\\"david-hope\\"}],image:\\"opentelemetry-graphic-ad-2-1920x1080.png\\",tags:[{slug:\\"openai\\"},{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"genai\\"}]};function p(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"ChatGPT is so hot right now, it broke the internet. As an avid user of ChatGPT and a developer of ChatGPT applications, I am incredibly excited by the possibilities of this technology. What I see happening is that there will be exponential growth of ChatGPT-based solutions, and people are going to need to monitor those solutions.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Since this is a pretty new technology, we wouldn\\\\u2019t want to burden our shiny new code with proprietary technology, would we? No, we would not, and that is why we are going to use OpenTelemetry to monitor our ChatGPT code in this blog. This is particularly relevant for me as I recently created a service to generate meeting notes from Zoom calls. If I am to release this into the wild, how much is it going to cost me and how do I make sure it is available?\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"openai-apis-to-the-rescue\\",children:\\"OpenAI APIs to the rescue\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The OpenAI API is pretty awesome, there is no doubt. It also gives us the information shown below in each response to each API call, which can help us with understanding what we are being charged. By using the token counts, the model, and the pricing that OpenAI has put up on its website, we can calculate the cost. The question is, how do we get this information into our monitoring tools?\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`{\\n \\"choices\\": [\\n {\\n \\"finish_reason\\": \\"length\\",\\n \\"index\\": 0,\\n \\"logprobs\\": null,\\n \\"text\\": \\"\\\\\\\\n\\\\\\\\nElastic is an amazing observability tool because it provides a comprehensive set of features for monitoring\\"\\n }\\n ],\\n \\"created\\": 1680281710,\\n \\"id\\": \\"cmpl-70CJq07gibupTcSM8xOWekOTV5FRF\\",\\n \\"model\\": \\"text-davinci-003\\",\\n \\"object\\": \\"text_completion\\",\\n \\"usage\\": {\\n \\"completion_tokens\\": 20,\\n \\"prompt_tokens\\": 9,\\n \\"total_tokens\\": 29\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"opentelemetry-to-the-rescue\\",children:\\"OpenTelemetry to the rescue\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" is truly a fantastic piece of work. It has had so much adoption and work committed to it over the years, and it seems to really be getting to the point where we can call it the Linux of Observability. We can use it to record logs, metrics, and traces and get those in a vendor neutral way into our favorite observability tool \\\\u2014 in this case, Elastic Observability.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With the latest and greatest otel libraries in Python, we can auto-instrument external calls, and this will help us understand how OpenAI calls are performing. Let\'s take a sneak peek at our sample Python application, which implements Flask and the ChatGPT API and also has OpenTelemetry. If you want to try this yourself, take a look at the GitHub link at the end of this blog and follow these steps.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"set-up-elastic-cloud-account-if-you-already-dont-have-one\\",children:\\"Set up Elastic Cloud account (if you already don\\\\u2019t have one)\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Sign up for a two-week free trial at \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/elasticsearch-service/signup\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/cloud/elasticsearch-service/signup\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Create a deployment.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once you are logged in, click \\",(0,t.jsx)(e.strong,{children:\\"Add integrations\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-cloud-deployment-add-integrations.png\\",alt:\\"elastic cloud deployment add integrations\\",width:\\"1999\\",height:\\"1006\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Click on \\",(0,t.jsx)(e.strong,{children:\\"APM Integration\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-apm-integration.png\\",alt:\\"elastic apm integration\\",width:\\"325\\",height:\\"178\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Then scroll down to get the details you need for this blog:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-opentelemetry-download.png\\",alt:\\"elastic opentelemetry download\\",width:\\"1476\\",height:\\"1324\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Be sure to set the following Environment variables, replacing the variables with data you get from Elastic as above and OpenAI from \\",(0,t.jsx)(e.a,{href:\\"https://platform.openai.com/account/api-keys\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", and then run these export commands on the command line.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`export OPEN_AI_KEY=sk-abcdefgh5ijk2l173mnop3qrstuvwxyzab2cde47fP2g9jij\\nexport OTEL_EXPORTER_OTLP_AUTH_HEADER=abc9ldeofghij3klmn\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=https://123456abcdef.apm.us-west2.gcp.elastic-cloud.com:443\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And install the following Python libraries:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`pip3 install opentelemetry-api\\npip3 install opentelemetry-sdk\\npip3 install opentelemetry-exporter-otlp\\npip3 install opentelemetry-instrumentation\\npip3 install opentelemetry-instrumentation-requests\\npip3 install openai\\npip3 install flask\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here is a look at the code we are using for the example application. In the real world, this would be your own code. All this does is call OpenAI APIs with the following message: \\\\u201CWhy is Elastic an amazing observability tool?\\\\u201D\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`import openai\\nfrom flask import Flask\\nimport monitor # Import the module\\nfrom opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter\\nimport urllib\\nimport os\\nfrom opentelemetry import trace\\nfrom opentelemetry.sdk.resources import SERVICE_NAME, Resource\\nfrom opentelemetry.sdk.trace import TracerProvider\\nfrom opentelemetry.sdk.trace.export import BatchSpanProcessor\\nfrom opentelemetry.instrumentation.requests import RequestsInstrumentor\\n\\n# OpenTelemetry setup up code here, feel free to replace the \\\\u201Cyour-service-name\\\\u201D attribute here.\\nresource = Resource(attributes={\\n SERVICE_NAME: \\"your-service-name\\"\\n})\\nprovider = TracerProvider(resource=resource)\\nprocessor = BatchSpanProcessor(OTLPSpanExporter(endpoint=os.getenv(\'OTEL_EXPORTER_OTLP_ENDPOINT\'),\\n headers=\\"Authorization=Bearer%20\\"+os.getenv(\'OTEL_EXPORTER_OTLP_AUTH_HEADER\')))\\nprovider.add_span_processor(processor)\\ntrace.set_tracer_provider(provider)\\ntracer = trace.get_tracer(__name__)\\nRequestsInstrumentor().instrument()\\n\\n\\n\\n# Initialize Flask app and instrument it\\n\\napp = Flask(__name__)\\n# Set OpenAI API key\\nopenai.api_key = os.getenv(\'OPEN_AI_KEY\')\\n\\n\\n@app.route(\\"/completion\\")\\n@tracer.start_as_current_span(\\"do_work\\")\\ndef completion():\\n response = openai.Completion.create(\\n model=\\"text-davinci-003\\",\\n prompt=\\"Why is Elastic an amazing observability tool?\\",\\n max_tokens=20,\\n temperature=0\\n )\\n return response.choices[0].text.strip()\\n\\nif __name__ == \\"__main__\\":\\n app.run()\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This code should be fairly familiar to anyone who has implemented OpenTelemetry with Python here \\\\u2014 there is no specific magic. The magic happens inside the \\\\u201Cmonitor\\\\u201D code that you can use freely to instrument your own OpenAI applications.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"monkeying-around\\",children:\\"Monkeying around\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Inside the monitor.py code, you will see we do something called \\\\u201CMonkey Patching.\\\\u201D Monkey patching is a technique in Python where you dynamically modify the behavior of a class or module at runtime by modifying its attributes or methods. Monkey patching allows you to change the functionality of a class or module without having to modify its source code. It can be useful in situations where you need to modify the behavior of an existing class or module that you don\'t have control over or cannot modify directly.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"What we want to do here is modify the behavior of the \\\\u201CCompletion\\\\u201D call so we can steal the response metrics and add them to our OpenTelemetry spans. You can see how we do that below:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`def count_completion_requests_and_tokens(func):\\n @wraps(func)\\n def wrapper(*args, **kwargs):\\n counters[\'completion_count\'] += 1\\n response = func(*args, **kwargs)\\n token_count = response.usage.total_tokens\\n prompt_tokens = response.usage.prompt_tokens\\n completion_tokens = response.usage.completion_tokens\\n cost = calculate_cost(response)\\n strResponse = json.dumps(response)\\n # Set OpenTelemetry attributes\\n span = trace.get_current_span()\\n if span:\\n span.set_attribute(\\"completion_count\\", counters[\'completion_count\'])\\n span.set_attribute(\\"token_count\\", token_count)\\n span.set_attribute(\\"prompt_tokens\\", prompt_tokens)\\n span.set_attribute(\\"completion_tokens\\", completion_tokens)\\n span.set_attribute(\\"model\\", response.model)\\n span.set_attribute(\\"cost\\", cost)\\n span.set_attribute(\\"response\\", strResponse)\\n return response\\n return wrapper\\n# Monkey-patch the openai.Completion.create function\\nopenai.Completion.create = count_completion_requests_and_tokens(openai.Completion.create)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"By adding all this data to our Span, we can actually send it to our OpenTelemetry OTLP endpoint (in this case it will be Elastic). The benefit of doing this is that you can easily use the data for search or to build dashboards and visualizations. In the final step, we also want to calculate the cost. We do this by implementing the following function, which will calculate the cost of a single request to the OpenAI APIs.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`def calculate_cost(response):\\n if response.model in [\'gpt-4\', \'gpt-4-0314\']:\\n cost = (response.usage.prompt_tokens * 0.03 + response.usage.completion_tokens * 0.06) / 1000\\n elif response.model in [\'gpt-4-32k\', \'gpt-4-32k-0314\']:\\n cost = (response.usage.prompt_tokens * 0.06 + response.usage.completion_tokens * 0.12) / 1000\\n elif \'gpt-3.5-turbo\' in response.model:\\n cost = response.usage.total_tokens * 0.002 / 1000\\n elif \'davinci\' in response.model:\\n cost = response.usage.total_tokens * 0.02 / 1000\\n elif \'curie\' in response.model:\\n cost = response.usage.total_tokens * 0.002 / 1000\\n elif \'babbage\' in response.model:\\n cost = response.usage.total_tokens * 0.0005 / 1000\\n elif \'ada\' in response.model:\\n cost = response.usage.total_tokens * 0.0004 / 1000\\n else:\\n cost = 0\\n return cost\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"elastic-to-the-rescue\\",children:\\"Elastic to the rescue\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once we are capturing all this data, it\\\\u2019s time to have some fun with it in Elastic. In Discover, we can see all the data points we sent over using the OpenTelemetry library:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-discover-apm.png\\",alt:\\"elastic discover apm\\",width:\\"1507\\",height:\\"625\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With these labels in place, it is very easy to build a dashboard. Take a look at this one I built earlier (\\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/ChatGPTMonitoringWithOtel/blob/main/chatGPTDashboard.ndjson\\",rel:\\"nofollow\\",children:\\"which is also checked into my GitHub Repository\\"}),\\"):\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-labels-dashboard.png\\",alt:\\"elastic labels dashboard\\",width:\\"1507\\",height:\\"845\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We can also see Transactions, Latency of the OpenAI service, and all the spans related to our ChatGPT service calls.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-observability-service-name.png\\",alt:\\"observability service name\\",width:\\"1729\\",height:\\"697\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-your-service-name.png\\",alt:\\"elastic your service name\\",width:\\"1473\\",height:\\"702\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-api-openai.png\\",alt:\\"elastic api openai\\",width:\\"1481\\",height:\\"700\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the transaction view, we can also see how long specific OpenAI calls have taken:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-openai-api-gpt-models-opentelemetry/blog-elastic-latency-distribution.png\\",alt:\\"elastic latency distribution\\",width:\\"1479\\",height:\\"748\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Some requests to OpenAI here have taken over 3 seconds. ChatGPT can be very slow, so it\\\\u2019s important for us to understand how slow this is and if users are becoming frustrated.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We looked at monitoring ChatGPT with OpenTelemetry with Elastic. ChatGPT is a worldwide phenomenon and it\\\\u2019s going to no doubt grow and grow, and pretty soon everyone will be using it. Because it can be slow to get responses out, it is critical that people are able to understand the performance of any code that is using this service.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"There is also the issue of cost, since it\\\\u2019s incredibly important to understand if this service is eating into your margins and if what you are asking for is profitable for your business. With the current economic environment, we have to keep an eye on profitability.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Take a look at the code for this solution \\",(0,t.jsx)(e.a,{href:\\"https://github.com/davidgeorgehope/ChatGPTMonitoringWithOtel\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". And please feel free to use the \\\\u201Cmonitor\\\\u201D library to instrument your own OpenAI code.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Interested in learning more about Elastic Observability? Check out the following resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/intro-to-elastic-observability\\",rel:\\"nofollow\\",children:\\"An Introduction to Elastic Observability\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/training/observability-fundamentals\\",rel:\\"nofollow\\",children:\\"Observability Fundamentals Training\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/demo\\",rel:\\"nofollow\\",children:\\"Watch an Elastic Observability demo\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-predictions-trends-2023\\",rel:\\"nofollow\\",children:\\"Observability Predictions and Trends for 2023\\"})}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"And sign up for our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/emerging-trends-in-observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability Trends Webinar\\"}),\\" featuring AWS and Forrester, not to be missed!\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"In this blog post, we may have used third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Elastic, Elasticsearch and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(p,{...n})}):p(n)}return _(k);})();\\n;return Component;"},"_id":"articles/monitor-openai-api-gpt-models-opentelemetry-elastic.mdx","_raw":{"sourceFilePath":"articles/monitor-openai-api-gpt-models-opentelemetry-elastic.mdx","sourceFileName":"monitor-openai-api-gpt-models-opentelemetry-elastic.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/monitor-openai-api-gpt-models-opentelemetry-elastic"},"type":"Article","imageUrl":"/assets/images/monitor-openai-api-gpt-models-opentelemetry/opentelemetry-graphic-ad-2-1920x1080.png","readingTime":"11 min read","url":"/monitor-openai-api-gpt-models-opentelemetry","headings":[{"level":2,"title":"OpenAI APIs to the rescue","href":"#openai-apis-to-the-rescue"},{"level":2,"title":"OpenTelemetry to the rescue","href":"#opentelemetry-to-the-rescue"},{"level":3,"title":"Set up Elastic Cloud account (if you already don’t have one)","href":"#set-up-elastic-cloud-account-if-you-already-dont-have-one"},{"level":2,"title":"Monkeying around","href":"#monkeying-around"},{"level":2,"title":"Elastic to the rescue","href":"#elastic-to-the-rescue"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Monitor your Python data pipelines with OTEL","slug":"monitor-your-python-data-pipelines-with-otel","date":"2024-08-08","description":"Learn how to configure OTEL for your data pipelines, detect any anomalies, analyze performance, and set up corresponding alerts with Elastic.","image":"main_image.jpg","author":[{"slug":"tamara-dancheva","type":"Author","_raw":{}},{"slug":"almudena-sanz-olive","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"machine-learning","type":"Tag","_raw":{}},{"slug":"python","type":"Tag","_raw":{}}],"body":{"raw":"\\nThis article delves into how to implement observability practices, particularly using [OpenTelemetry (OTEL)](https://opentelemetry.io/) in Python, to enhance the monitoring and quality control of data pipelines using Elastic. While the primary focus of the examples presented in the article is ETL (Extract, Transform, Load) processes to ensure the accuracy and reliability of data pipelines that is crucial for Business Intelligence (BI), the strategies and tools discussed are equally applicable to Python processes used for Machine Learning (ML) models or other data processing tasks.\\n\\n## Introduction\\n\\nData pipelines, particularly ETL processes, form the backbone of modern data architectures. These pipelines are responsible for extracting raw data from various sources, transforming it into meaningful information, and loading it into data warehouses or data lakes for analysis and reporting.\\n\\nIn our organization, we have Python-based ETL scripts that play a pivotal role in exporting and processing data from Elasticsearch (ES) clusters and loading it into [Google BigQuery (BQ)](https://cloud.google.com/bigquery). This processed data then feeds into [DBT (Data Build Tool)](https://www.getdbt.com) models, which further refine the data and make it available for analytics and reporting. To see the full architecture and learn how we monitor our DBT pipelines with Elastic see [Monitor your DBT pipelines with Elastic Observability](https://www.elastic.co/observability-labs/blog/monitor-dbt-pipelines-with-elastic-observability). In this article we focus on the ETL scripts. Given the critical nature of these scripts, it is imperative to set up mechanisms to control and ensure the quality of the data they generate.\\n\\nThe strategies discussed here can be extended to any script or application that handles data processing or machine learning models, regardless of the programming language used as long as there exists a corresponding agent that supports OTEL instrumentation. \\n\\n## Motivation\\n\\nObservability in data pipelines involves monitoring the entire lifecycle of data processing to ensure that everything works as expected. It includes:\\n\\n1. Data Quality Control:\\n- Detecting anomalies in the data, such as unexpected drops in record counts.\\n- Verifying that data transformations are applied correctly and consistently.\\n- Ensuring the integrity and accuracy of the data loaded into the data warehouse.\\n\\n2. Performance Monitoring:\\n- Tracking the execution time of ETL scripts to identify bottlenecks and optimize performance.\\n- Monitoring resource usage, such as memory and CPU consumption, to ensure efficient use of infrastructure.\\n\\n3. Real-time Alerting:\\n- Setting up alerts for immediate notification of issues such as failed ETL jobs, data quality issues, or performance degradation.\\n- Identify the root case of such incidents\\n- Proactively addressing incidents to minimize downtime and impact on business operations\\n\\nIssues such as failed ETL jobs, can even point to larger infrastructure or data source data quality issues.\\n\\n## Steps for Instrumentation\\n\\nHere are the steps to automatically instrument your Python script for exporting OTEL traces, metrics, and logs.\\n\\n### Step 1: Import Required Libraries\\n\\nWe first need to install the following libraries.\\n\\n```sh\\npip install elastic-opentelemetry google-cloud-bigquery[opentelemetry]\\n```\\nYou can also them to your project\'s `requirements.txt` file and install them with `pip install -r requirements.txt`.\\n\\n#### Explanation of Dependencies\\n\\n1. **elastic-opentelemetry**: This package is the Elastic Distribution for OpenTelemetry Python. Under the hood it will install the following packages: \\n\\n\\t- **opentelemetry-distro**: This package is a convenience distribution of OpenTelemetry, which includes the OpenTelemetry SDK, APIs, and various instrumentation packages. It simplifies the setup and configuration of OpenTelemetry in your application.\\n\\n\\t- **opentelemetry-exporter-otlp**: This package provides an exporter that sends telemetry data to the OpenTelemetry Collector or any other endpoint that supports the OpenTelemetry Protocol (OTLP). This includes traces, metrics, and logs.\\n\\n\\t- **opentelemetry-instrumentation-system-metrics**: This package provides instrumentation for collecting system metrics, such as CPU usage, memory usage, and other system-level metrics.\\n\\n2. **google-cloud-bigquery[opentelemetry]**: This package integrates Google Cloud BigQuery with OpenTelemetry, allowing you to trace and monitor BigQuery operations.\\n\\n\\n### Step 2: Export OTEL Variables\\n\\nSet the necessary OpenTelemetry (OTEL) variables by getting the configuration from APM OTEL from Elastic.\\n\\nGo to APM -> Services -> Add data (top left corner).\\n\\n![1 - Get OTEL variables step 1](/assets/images/monitor-your-python-data-pipelines-with-otel/otel-variables-1.png)\\n\\nIn this section you will find the steps how to configure various APM agents. Navigate to OpenTelemetry to find the variables that you need to export. \\n\\n![2 - Get OTEL variables step 2](/assets/images/monitor-your-python-data-pipelines-with-otel/otel-variables-2.png)\\n\\n**Find OTLP Endpoint**:\\n- Look for the section related to OpenTelemetry or OTLP configuration.\\n- The `OTEL_EXPORTER_OTLP_ENDPOINT` is typically provided as part of the setup instructions for integrating OpenTelemetry with Elastic APM. It might look something like `https:///otlp`.\\n\\n **Obtain OTLP Headers**:\\n\\n- In the same section, you should find instructions or a field for OTLP headers. These headers are often used for authentication purposes.\\n- Copy the necessary headers provided by the interface. They might look like `Authorization: Bearer `.\\n\\nNote: Notice you need to replace the whitespace between `Bearer` and your token with `%20` in the `OTEL_EXPORTER_OTLP_HEADERS` variable when using Python.\\n\\nAlternatively you can use a different approach for authentication using API keys (see [instructions](https://github.com/elastic/elastic-otel-python?tab=readme-ov-file#authentication)). If you are using our [serverless offering](https://www.elastic.co/docs/current/serverless/general/what-is-serverless-elastic) you will need to use this approach instead. \\n\\n **Set up the variables**:\\n- Replace the placeholders in your script with the actual values obtained from the Elastic APM interface and execute it in your shell via the source command `source env.sh`.\\n\\nBelow is a script to set these variables:\\n\\n```sh\\n#!/bin/bash\\necho \\"--- :otel: Setting OTEL variables\\"\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=\'https://your-apm-server/otlp:443\'\\nexport OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer%20your-token\'\\nexport OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED=true\\nexport OTEL_PYTHON_LOG_CORRELATION=true\\nexport ELASTIC_OTEL_SYSTEM_METRICS_ENABLED=true\\nexport OTEL_METRIC_EXPORT_INTERVAL=5000\\nexport OTEL_LOGS_EXPORTER=\\"otlp,console\\"\\n```\\n\\nWith these variables set, we are ready for auto-instrumentation without needing to add anything to the code.\\n\\n#### Explanation of Variables\\n\\n- **OTEL_EXPORTER_OTLP_ENDPOINT**: This variable specifies the endpoint to which OTLP data (traces, metrics, logs) will be sent. Replace `placeholder` with your actual OTLP endpoint.\\n \\n- **OTEL_EXPORTER_OTLP_HEADERS**: This variable specifies any headers required for authentication or other purposes when sending OTLP data. Replace `placeholder` with your actual OTLP headers.\\n \\n- **OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED**: This variable enables auto-instrumentation for logging in Python, allowing logs to be automatically enriched with trace context.\\n\\n- **OTEL_PYTHON_LOG_CORRELATION**: This variable enables log correlation, which includes trace context in log entries to correlate logs with traces.\\n\\n- **OTEL_METRIC_EXPORT_INTERVAL**: This variable specifies the metric export interval in milliseconds, in this case 5s. \\n\\n- **OTEL_LOGS_EXPORTER**: This variable specifies the exporter to use for logs. Setting it to \\"otlp\\" means that logs will be exported using the OTLP protocol. Adding \\"console\\" specifies that logs should be exported to both the OTLP endpoint and the console. In our case for better visibility on the infa side, we choose to export to console as well.\\n\\n- **ELASTIC_OTEL_SYSTEM_METRICS_ENABLED**: It is needed to use this variable when using the Elastic distribution as by default it is set to false. \\n\\nNote: **OTEL_METRICS_EXPORTER** and **OTEL_TRACES_EXPORTER**: This variables specify the exporter to use for metrics/traces, and are set to \\"otlp\\" by default, which means that metrics and traces will be exported using the OTLP protocol.\\n\\n### Running Python ETLs\\n\\nWe run Python ETLs with the following command:\\n\\n```sh\\nOTEL_RESOURCE_ATTRIBUTES=\\"service.name=x-ETL,service.version=1.0,deployment.environment=production\\" && opentelemetry-instrument python3 X_ETL.py \\n```\\n\\n#### Explanation of the Command\\n\\n- **OTEL_RESOURCE_ATTRIBUTES**: This variable specifies additional resource attributes, such as [service name](https://www.elastic.co/guide/en/observability/current/apm.html), service version and deployment environment, that will be included in all telemetry data, you can customize these values per your needs. You can use a different service name for each script.\\n\\n- **opentelemetry-instrument**: This command auto-instruments the specified Python script for OpenTelemetry. It sets up the necessary hooks to collect traces, metrics, and logs.\\n\\n- **python3 X_ETL.py**: This runs the specified Python script (`X_ETL.py`).\\n\\n### Tracing\\n\\nWe export the traces via the default OTLP protocol.\\n\\nTracing is a key aspect of monitoring and understanding the performance of applications. [Spans](https://www.elastic.co/guide/en/observability/current/apm-data-model-spans.html) form the building blocks of tracing. They encapsulate detailed information about the execution of specific code paths. They record the start and end times of activities and can have hierarchical relationships with other spans, forming a parent/child structure.\\n\\nSpans include essential attributes such as transaction IDs, parent IDs, start times, durations, names, types, subtypes, and actions. Additionally, spans may contain stack traces, which provide a detailed view of function calls, including attributes like function name, file path, and line number, which is especially useful for debugging. These attributes help us analyze the script\'s execution flow, identify performance issues, and enhance optimization efforts.\\n\\nWith the default instrumentation, the whole Python script would be a single span. In our case we have decided to manually add specific spans per the different phases of the Python process, to be able to measure their latency, throughput, error rate, etc individually. This is how we define spans manually: \\n\\n```python\\nfrom opentelemetry import trace\\n\\nif __name__ == \\"__main__\\":\\n\\n tracer = trace.get_tracer(\\"main\\")\\n with tracer.start_as_current_span(\\"initialization\\") as span:\\n # Init code\\n … \\n with tracer.start_as_current_span(\\"search\\") as span:\\n # Step 1 - Search code\\n …\\n with tracer.start_as_current_span(\\"transform\\") as span:\\n # Step 2 - Transform code\\n …\\n with tracer.start_as_current_span(\\"load\\") as span:\\n # Step 3 - Load code\\n …\\n```\\nYou can explore traces in the APM interface as shown below. \\n\\n![3 - APM Traces view](/assets/images/monitor-your-python-data-pipelines-with-otel/Traces-APM-Observability-Elastic.png)\\n\\n### Metrics \\n\\nWe export metrics via the default OTLP protocol as well, such as CPU usage and memory. No extra code needs to be added in the script itself. \\n\\nNote: Remember to set `ELASTIC_OTEL_SYSTEM_METRICS_ENABLED` to true. \\n\\n![4 - APM Metrics view](/assets/images/monitor-your-python-data-pipelines-with-otel/otel-metrics-apm-view.png)\\n\\n### Logging\\n\\nWe export logs via the default OTLP protocol as well.\\n\\nFor logging, we modify the logging calls to add extra fields using a dictionary structure (bq_fields) as shown below:\\n\\n```python\\n job.result() # Waits for table load to complete\\n job_details = client.get_job(job.job_id) # Get job details\\n\\n # Extract job information\\n bq_fields = {\\n # \\"slot_time_ms\\": job_details.slot_ms,\\n \\"job_id\\": job_details.job_id,\\n \\"job_type\\": job_details.job_type,\\n \\"state\\": job_details.state,\\n \\"path\\": job_details.path,\\n \\"job_created\\": job_details.created.isoformat(),\\n \\"job_ended\\": job_details.ended.isoformat(),\\n \\"execution_time_ms\\": (\\n job_details.ended - job_details.created\\n ).total_seconds()\\n * 1000,\\n \\"bytes_processed\\": job_details.output_bytes,\\n \\"rows_affected\\": job_details.output_rows,\\n \\"destination_table\\": job_details.destination.table_id,\\n \\"event\\": \\"BigQuery Load Job\\", # Custom event type\\n \\"status\\": \\"success\\", # Status of the step (success/error)\\n \\"category\\": category # ETL category tag \\n }\\n\\n logging.info(\\"BigQuery load operation successful\\", extra=bq_fields)\\n```\\nThis code shows how to extract BQ job stats, execution time, bytes processed, rows affected and destination table among them. You can add other metadata like we do such as custom event type, status, and category. \\n\\nAny calls to logging (of all levels above the set threshold, in this case INFO `logging.getLogger().setLevel(logging.INFO)`) will create a log that will be exported to Elastic. This means that in Python scripts that already use `logging` there is no need to make any changes to export logs to Elastic.\\n\\n![5 - APM Logs view](/assets/images/monitor-your-python-data-pipelines-with-otel/otel-logs-apm-view.png)\\n\\nFor each of the log messages, you can go into the details view (click on the `…` when you hover over the log line and go into `View details`) to examine the metadata attached to the log message. You can also explore the logs in [Discover](https://www.elastic.co/guide/en/kibana/8.14/discover.html).\\n\\n#### Explanation of Logging Modification\\n\\n- **logging.info**: This logs an informational message. The message \\"BigQuery load operation successful\\" is logged.\\n\\n- **extra=bq_fields**: This adds additional context to the log entry using the `bq_fields` dictionary. This context can include details making the log entries more informative and easier to analyze. This data will be later used to set up alerts and data anomaly detection jobs. \\n\\n## Monitoring in Elastic\'s APM\\n\\nAs shown, we can examine traces, metrics, and logs in the APM interface. To make the most out of this data, we make use on top of nearly the whole suit of features in Elastic Observability alongside Elastic Analytic\'s ML capabilities.\\n\\n### Rules and Alerts \\n\\nWe can set up rules and alerts to detect anomalies, errors, and performance issues in our scripts.\\n\\nThe [`error count threshold` rule](https://www.elastic.co/guide/en/kibana/current/apm-alerts.html#apm-create-error-alert) is used to create a trigger when the number of errors in a service exceeds a defined threshold.\\n\\nTo create the rule go to Alerts and Insights -> Rules -> Create Rule -> Error count threshold, set the error count threshold, the service or environment you want to monitor (you can also set an error grouping key across services), how often to run the check, and choose a connector.\\n\\n![6 - ETL Status Error Rule](/assets/images/monitor-your-python-data-pipelines-with-otel/error-count-threshold.png)\\n\\nNext, we create a rule of type `custom threshold` on a given ETL logs [data view](https://www.elastic.co/guide/en/kibana/current/data-views.html) (create one for your index) filtering on \\"labels.status: error\\" to get all the logs with status error from any of the steps of the ETL which have failed. The rule condition is set to document count > 0. In our case, in the last section of the rule config, we also set up Slack [alerts](https://www.elastic.co/guide/en/kibana/current/alerting-getting-started.html) every time the rule is activated. You can pick from a long list of [connectors](https://www.elastic.co/guide/en/kibana/current/action-types.html) Elastic supports. \\n\\n![7 - ETL Status Error Rule](/assets/images/monitor-your-python-data-pipelines-with-otel/etl-fail-status-rule.png)\\n\\nThen we can set up alerts for failures. We add status to the logs metadata as shown in the code sample below for each of the steps in the ETLs. It then becomes available in ES via `labels.status`.\\n\\n```python\\nlogging.info(\\n \\"Elasticsearch search operation successful\\",\\n extra={\\n \\"event\\": \\"Elasticsearch Search\\",\\n \\"status\\": \\"success\\",\\n \\"category\\": category,\\n \\"index\\": index,\\n },\\n )\\n```\\n\\n### More Rules\\n\\nWe could also add rules to detect anomalies in the execution time of the different spans we define. This is done by selecting transaction/span -> Alerts and rules -> Custom threshold rule -> Latency. In the example below, we want to generate an alert whenever the search step takes more than 25s. \\n\\n![8 - APM Custom Threshold - Latency](/assets/images/monitor-your-python-data-pipelines-with-otel/apm_custom_threshold_latency.png)\\n\\n![9 - APM Custom Threshold - Config](/assets/images/monitor-your-python-data-pipelines-with-otel/apm_custom_threshold_latency_2.png)\\n\\nAlternatively, for finer-grained control, you can go with Alerts and rules -> Anomaly rule, set up an anomaly job, and pick a threshold severity level. \\n\\n![10 - APM Anomaly Rule - Config](/assets/images/monitor-your-python-data-pipelines-with-otel/apm_anomaly_rule_config.png)\\n\\n### Anomaly detection job\\n\\nIn this example we set an anomaly detection job on the number of documents before transform. \\n\\nWe set up an [Anomaly Detection jobs](https://www.elastic.co/guide/en/machine-learning/current/ml-ad-run-jobs.html) on the number of document before the transform using the [Single metric job] (https://www.elastic.co/guide/en/machine-learning/current/ml-anomaly-detection-job-types.html#multi-metric-jobs) to detect any anomalies with the incoming data source.\\n\\n![11 - Single Metrics](/assets/images/monitor-your-python-data-pipelines-with-otel/single-metrics.png)\\n\\nIn the last step, you can create alerting similarly to what we did before to receive alerts whenever there is an anomaly detected, by setting up a severity level threshold. Using the anomaly score which is assigned to every anomaly, every anomaly is characterized by a severity level. \\n\\n![12 - Anomaly detection Alerting - Severity](/assets/images/monitor-your-python-data-pipelines-with-otel/anomaly-detection-alerting-1.png)\\n\\nSimilarly to the previous example, we set up a Slack connector to receive alerts whenever an anomaly is detected.\\n\\n![13 - Anomaly detection Alerting - Connectors](/assets/images/monitor-your-python-data-pipelines-with-otel/anomaly-detection-alerting-connectors.png)\\n\\nYou can go to your custom dashboard by going to Add Panel -> ML -> Anomaly Swim Lane -> Pick your job. \\n\\nSimilarly, we add jobs for the number of documents after the transform, and a Multi-Metric one on the `execution_time_ms`, `bytes_processed` and `rows_affected` similarly to how it was done in [Monitor your DBT pipelines with Elastic Observability](https://www.elastic.co/observability-labs/blog/monitor-dbt-pipelines-with-elastic-observability).\\n\\n## Custom Dashboard \\n\\nNow that your logs, metrics, and traces are in Elastic, you can use the full potential of our Kibana dashboards to extract the most from them. We can create a custom dashboard like the following one: a pie chart based on `labels.event` (category field for every type of step in the ETLs), a chart for every type of step broken down by status, a timeline of steps broken down by status, BQ stats for the ETL, and anomaly detection swim lane panels for the various anomaly jobs. \\n\\n![14 - Custom Dashboard](/assets/images/monitor-your-python-data-pipelines-with-otel/custom_dashboard.png)\\n\\n## Conclusion\\n\\nElastic’s APM, in combination with other Observability and ML features, provides a unified view of our data pipelines, allowing us to bring a lot of value with minimal code changes:\\n\\n- Logging of new logs (no need to add custom logging) alongside their execution context\\n- Monitor the runtime behavior of our models\\n- Track data quality issues\\n- Identify and troubleshoot real-time incidents\\n- Optimize performance bottlenecks and resource usage\\n- Identify dependencies on other services and their latency\\n- Optimize data transformation processes\\n- Set up alerts on latency, data quality issues, error rates of transactions or CPU usage)\\n\\nWith these capabilities, we can ensure the resilience and reliability of our data pipelines, leading to more robust and accurate BI system and reporting.\\n\\nIn conclusion, setting up OpenTelemetry (OTEL) in Python for data pipeline observability has significantly improved our ability to monitor, detect, and resolve issues proactively. This has led to more reliable data transformations, better resource management, and enhanced overall performance of our data transformation, BI and Machine Learning systems.\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)r(n,i,{get:e[i],enumerable:!0})},s=(n,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!y.call(n,o)&&o!==i&&r(n,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return n};var b=(n,e,i)=>(i=n!=null?p(g(n)):{},s(e||!n||!n.__esModule?r(i,\\"default\\",{value:n,enumerable:!0}):i,n)),T=n=>s(r({},\\"__esModule\\",{value:!0}),n);var c=f((L,l)=>{l.exports=_jsx_runtime});var v={};w(v,{default:()=>d,frontmatter:()=>E});var t=b(c()),E={title:\\"Monitor your Python data pipelines with OTEL\\",slug:\\"monitor-your-python-data-pipelines-with-otel\\",date:\\"2024-08-08\\",description:\\"Learn how to configure OTEL for your data pipelines, detect any anomalies, analyze performance, and set up corresponding alerts with Elastic.\\",author:[{slug:\\"tamara-dancheva\\"},{slug:\\"almudena-sanz-olive\\"}],image:\\"main_image.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"log-analytics\\"},{slug:\\"apm\\"},{slug:\\"machine-learning\\"},{slug:\\"python\\"}]};function h(n){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",h4:\\"h4\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"This article delves into how to implement observability practices, particularly using \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry (OTEL)\\"}),\\" in Python, to enhance the monitoring and quality control of data pipelines using Elastic. While the primary focus of the examples presented in the article is ETL (Extract, Transform, Load) processes to ensure the accuracy and reliability of data pipelines that is crucial for Business Intelligence (BI), the strategies and tools discussed are equally applicable to Python processes used for Machine Learning (ML) models or other data processing tasks.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"introduction\\",children:\\"Introduction\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Data pipelines, particularly ETL processes, form the backbone of modern data architectures. These pipelines are responsible for extracting raw data from various sources, transforming it into meaningful information, and loading it into data warehouses or data lakes for analysis and reporting.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In our organization, we have Python-based ETL scripts that play a pivotal role in exporting and processing data from Elasticsearch (ES) clusters and loading it into \\",(0,t.jsx)(e.a,{href:\\"https://cloud.google.com/bigquery\\",rel:\\"nofollow\\",children:\\"Google BigQuery (BQ)\\"}),\\". This processed data then feeds into \\",(0,t.jsx)(e.a,{href:\\"https://www.getdbt.com\\",rel:\\"nofollow\\",children:\\"DBT (Data Build Tool)\\"}),\\" models, which further refine the data and make it available for analytics and reporting. To see the full architecture and learn how we monitor our DBT pipelines with Elastic see \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/monitor-dbt-pipelines-with-elastic-observability\\",rel:\\"nofollow\\",children:\\"Monitor your DBT pipelines with Elastic Observability\\"}),\\". In this article we focus on the ETL scripts. Given the critical nature of these scripts, it is imperative to set up mechanisms to control and ensure the quality of the data they generate.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The strategies discussed here can be extended to any script or application that handles data processing or machine learning models, regardless of the programming language used as long as there exists a corresponding agent that supports OTEL instrumentation.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"motivation\\",children:\\"Motivation\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Observability in data pipelines involves monitoring the entire lifecycle of data processing to ensure that everything works as expected. It includes:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Data Quality Control:\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Detecting anomalies in the data, such as unexpected drops in record counts.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Verifying that data transformations are applied correctly and consistently.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Ensuring the integrity and accuracy of the data loaded into the data warehouse.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Performance Monitoring:\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Tracking the execution time of ETL scripts to identify bottlenecks and optimize performance.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Monitoring resource usage, such as memory and CPU consumption, to ensure efficient use of infrastructure.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Real-time Alerting:\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Setting up alerts for immediate notification of issues such as failed ETL jobs, data quality issues, or performance degradation.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Identify the root case of such incidents\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Proactively addressing incidents to minimize downtime and impact on business operations\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Issues such as failed ETL jobs, can even point to larger infrastructure or data source data quality issues.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"steps-for-instrumentation\\",children:\\"Steps for Instrumentation\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here are the steps to automatically instrument your Python script for exporting OTEL traces, metrics, and logs.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-import-required-libraries\\",children:\\"Step 1: Import Required Libraries\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We first need to install the following libraries.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-sh\\",children:`pip install elastic-opentelemetry google-cloud-bigquery[opentelemetry]\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can also them to your project\'s \\",(0,t.jsx)(e.code,{children:\\"requirements.txt\\"}),\\" file and install them with \\",(0,t.jsx)(e.code,{children:\\"pip install -r requirements.txt\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h4,{id:\\"explanation-of-dependencies\\",children:\\"Explanation of Dependencies\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"elastic-opentelemetry\\"}),\\": This package is the Elastic Distribution for OpenTelemetry Python. Under the hood it will install the following packages:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"opentelemetry-distro\\"}),\\": This package is a convenience distribution of OpenTelemetry, which includes the OpenTelemetry SDK, APIs, and various instrumentation packages. It simplifies the setup and configuration of OpenTelemetry in your application.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"opentelemetry-exporter-otlp\\"}),\\": This package provides an exporter that sends telemetry data to the OpenTelemetry Collector or any other endpoint that supports the OpenTelemetry Protocol (OTLP). This includes traces, metrics, and logs.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"opentelemetry-instrumentation-system-metrics\\"}),\\": This package provides instrumentation for collecting system metrics, such as CPU usage, memory usage, and other system-level metrics.\\"]}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"google-cloud-bigquery[opentelemetry]\\"}),\\": This package integrates Google Cloud BigQuery with OpenTelemetry, allowing you to trace and monitor BigQuery operations.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-export-otel-variables\\",children:\\"Step 2: Export OTEL Variables\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Set the necessary OpenTelemetry (OTEL) variables by getting the configuration from APM OTEL from Elastic.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Go to APM -> Services -> Add data (top left corner).\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/otel-variables-1.png\\",alt:\\"1 - Get OTEL variables step 1\\",width:\\"3024\\",height:\\"560\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this section you will find the steps how to configure various APM agents. Navigate to OpenTelemetry to find the variables that you need to export.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/otel-variables-2.png\\",alt:\\"2 - Get OTEL variables step 2\\",width:\\"2930\\",height:\\"1295\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Find OTLP Endpoint\\"}),\\":\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Look for the section related to OpenTelemetry or OTLP configuration.\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The \\",(0,t.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_ENDPOINT\\"}),\\" is typically provided as part of the setup instructions for integrating OpenTelemetry with Elastic APM. It might look something like \\",(0,t.jsx)(e.code,{children:\\"https:///otlp\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Obtain OTLP Headers\\"}),\\":\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"In the same section, you should find instructions or a field for OTLP headers. These headers are often used for authentication purposes.\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Copy the necessary headers provided by the interface. They might look like \\",(0,t.jsx)(e.code,{children:\\"Authorization: Bearer \\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Note: Notice you need to replace the whitespace between \\",(0,t.jsx)(e.code,{children:\\"Bearer\\"}),\\" and your token with \\",(0,t.jsx)(e.code,{children:\\"%20\\"}),\\" in the \\",(0,t.jsx)(e.code,{children:\\"OTEL_EXPORTER_OTLP_HEADERS\\"}),\\" variable when using Python.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Alternatively you can use a different approach for authentication using API keys (see \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-python?tab=readme-ov-file#authentication\\",rel:\\"nofollow\\",children:\\"instructions\\"}),\\"). If you are using our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/serverless/general/what-is-serverless-elastic\\",rel:\\"nofollow\\",children:\\"serverless offering\\"}),\\" you will need to use this approach instead.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Set up the variables\\"}),\\":\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Replace the placeholders in your script with the actual values obtained from the Elastic APM interface and execute it in your shell via the source command \\",(0,t.jsx)(e.code,{children:\\"source env.sh\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Below is a script to set these variables:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-sh\\",children:`#!/bin/bash\\necho \\"--- :otel: Setting OTEL variables\\"\\nexport OTEL_EXPORTER_OTLP_ENDPOINT=\'https://your-apm-server/otlp:443\'\\nexport OTEL_EXPORTER_OTLP_HEADERS=\'Authorization=Bearer%20your-token\'\\nexport OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED=true\\nexport OTEL_PYTHON_LOG_CORRELATION=true\\nexport ELASTIC_OTEL_SYSTEM_METRICS_ENABLED=true\\nexport OTEL_METRIC_EXPORT_INTERVAL=5000\\nexport OTEL_LOGS_EXPORTER=\\"otlp,console\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With these variables set, we are ready for auto-instrumentation without needing to add anything to the code.\\"}),`\\n`,(0,t.jsx)(e.h4,{id:\\"explanation-of-variables\\",children:\\"Explanation of Variables\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"OTEL_EXPORTER_OTLP_ENDPOINT\\"}),\\": This variable specifies the endpoint to which OTLP data (traces, metrics, logs) will be sent. Replace \\",(0,t.jsx)(e.code,{children:\\"placeholder\\"}),\\" with your actual OTLP endpoint.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"OTEL_EXPORTER_OTLP_HEADERS\\"}),\\": This variable specifies any headers required for authentication or other purposes when sending OTLP data. Replace \\",(0,t.jsx)(e.code,{children:\\"placeholder\\"}),\\" with your actual OTLP headers.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED\\"}),\\": This variable enables auto-instrumentation for logging in Python, allowing logs to be automatically enriched with trace context.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"OTEL_PYTHON_LOG_CORRELATION\\"}),\\": This variable enables log correlation, which includes trace context in log entries to correlate logs with traces.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"OTEL_METRIC_EXPORT_INTERVAL\\"}),\\": This variable specifies the metric export interval in milliseconds, in this case 5s.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"OTEL_LOGS_EXPORTER\\"}),\': This variable specifies the exporter to use for logs. Setting it to \\"otlp\\" means that logs will be exported using the OTLP protocol. Adding \\"console\\" specifies that logs should be exported to both the OTLP endpoint and the console. In our case for better visibility on the infa side, we choose to export to console as well.\']}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"ELASTIC_OTEL_SYSTEM_METRICS_ENABLED\\"}),\\": It is needed to use this variable when using the Elastic distribution as by default it is set to false.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Note: \\",(0,t.jsx)(e.strong,{children:\\"OTEL_METRICS_EXPORTER\\"}),\\" and \\",(0,t.jsx)(e.strong,{children:\\"OTEL_TRACES_EXPORTER\\"}),\': This variables specify the exporter to use for metrics/traces, and are set to \\"otlp\\" by default, which means that metrics and traces will be exported using the OTLP protocol.\']}),`\\n`,(0,t.jsx)(e.h3,{id:\\"running-python-etls\\",children:\\"Running Python ETLs\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We run Python ETLs with the following command:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-sh\\",children:`OTEL_RESOURCE_ATTRIBUTES=\\"service.name=x-ETL,service.version=1.0,deployment.environment=production\\" && opentelemetry-instrument python3 X_ETL.py \\n`})}),`\\n`,(0,t.jsx)(e.h4,{id:\\"explanation-of-the-command\\",children:\\"Explanation of the Command\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"OTEL_RESOURCE_ATTRIBUTES\\"}),\\": This variable specifies additional resource attributes, such as \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm.html\\",rel:\\"nofollow\\",children:\\"service name\\"}),\\", service version and deployment environment, that will be included in all telemetry data, you can customize these values per your needs. You can use a different service name for each script.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"opentelemetry-instrument\\"}),\\": This command auto-instruments the specified Python script for OpenTelemetry. It sets up the necessary hooks to collect traces, metrics, and logs.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"python3 X_ETL.py\\"}),\\": This runs the specified Python script (\\",(0,t.jsx)(e.code,{children:\\"X_ETL.py\\"}),\\").\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"tracing\\",children:\\"Tracing\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We export the traces via the default OTLP protocol.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Tracing is a key aspect of monitoring and understanding the performance of applications. \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm-data-model-spans.html\\",rel:\\"nofollow\\",children:\\"Spans\\"}),\\" form the building blocks of tracing. They encapsulate detailed information about the execution of specific code paths. They record the start and end times of activities and can have hierarchical relationships with other spans, forming a parent/child structure.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Spans include essential attributes such as transaction IDs, parent IDs, start times, durations, names, types, subtypes, and actions. Additionally, spans may contain stack traces, which provide a detailed view of function calls, including attributes like function name, file path, and line number, which is especially useful for debugging. These attributes help us analyze the script\'s execution flow, identify performance issues, and enhance optimization efforts.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With the default instrumentation, the whole Python script would be a single span. In our case we have decided to manually add specific spans per the different phases of the Python process, to be able to measure their latency, throughput, error rate, etc individually. This is how we define spans manually:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`from opentelemetry import trace\\n\\nif __name__ == \\"__main__\\":\\n\\n tracer = trace.get_tracer(\\"main\\")\\n with tracer.start_as_current_span(\\"initialization\\") as span:\\n # Init code\\n \\\\u2026 \\n with tracer.start_as_current_span(\\"search\\") as span:\\n # Step 1 - Search code\\n \\\\u2026\\n with tracer.start_as_current_span(\\"transform\\") as span:\\n # Step 2 - Transform code\\n \\\\u2026\\n with tracer.start_as_current_span(\\"load\\") as span:\\n # Step 3 - Load code\\n \\\\u2026\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can explore traces in the APM interface as shown below.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/Traces-APM-Observability-Elastic.png\\",alt:\\"3 - APM Traces view\\",width:\\"3010\\",height:\\"2723\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"metrics\\",children:\\"Metrics\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We export metrics via the default OTLP protocol as well, such as CPU usage and memory. No extra code needs to be added in the script itself.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Note: Remember to set \\",(0,t.jsx)(e.code,{children:\\"ELASTIC_OTEL_SYSTEM_METRICS_ENABLED\\"}),\\" to true.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/otel-metrics-apm-view.png\\",alt:\\"4 - APM Metrics view\\",width:\\"2980\\",height:\\"1262\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"logging\\",children:\\"Logging\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We export logs via the default OTLP protocol as well.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For logging, we modify the logging calls to add extra fields using a dictionary structure (bq_fields) as shown below:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:` job.result() # Waits for table load to complete\\n job_details = client.get_job(job.job_id) # Get job details\\n\\n # Extract job information\\n bq_fields = {\\n # \\"slot_time_ms\\": job_details.slot_ms,\\n \\"job_id\\": job_details.job_id,\\n \\"job_type\\": job_details.job_type,\\n \\"state\\": job_details.state,\\n \\"path\\": job_details.path,\\n \\"job_created\\": job_details.created.isoformat(),\\n \\"job_ended\\": job_details.ended.isoformat(),\\n \\"execution_time_ms\\": (\\n job_details.ended - job_details.created\\n ).total_seconds()\\n * 1000,\\n \\"bytes_processed\\": job_details.output_bytes,\\n \\"rows_affected\\": job_details.output_rows,\\n \\"destination_table\\": job_details.destination.table_id,\\n \\"event\\": \\"BigQuery Load Job\\", # Custom event type\\n \\"status\\": \\"success\\", # Status of the step (success/error)\\n \\"category\\": category # ETL category tag \\n }\\n\\n logging.info(\\"BigQuery load operation successful\\", extra=bq_fields)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This code shows how to extract BQ job stats, execution time, bytes processed, rows affected and destination table among them. You can add other metadata like we do such as custom event type, status, and category.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Any calls to logging (of all levels above the set threshold, in this case INFO \\",(0,t.jsx)(e.code,{children:\\"logging.getLogger().setLevel(logging.INFO)\\"}),\\") will create a log that will be exported to Elastic. This means that in Python scripts that already use \\",(0,t.jsx)(e.code,{children:\\"logging\\"}),\\" there is no need to make any changes to export logs to Elastic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/otel-logs-apm-view.png\\",alt:\\"5 - APM Logs view\\",width:\\"2964\\",height:\\"703\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For each of the log messages, you can go into the details view (click on the \\",(0,t.jsx)(e.code,{children:\\"\\\\u2026\\"}),\\" when you hover over the log line and go into \\",(0,t.jsx)(e.code,{children:\\"View details\\"}),\\") to examine the metadata attached to the log message. You can also explore the logs in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/8.14/discover.html\\",rel:\\"nofollow\\",children:\\"Discover\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h4,{id:\\"explanation-of-logging-modification\\",children:\\"Explanation of Logging Modification\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"logging.info\\"}),\': This logs an informational message. The message \\"BigQuery load operation successful\\" is logged.\']}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"extra=bq_fields\\"}),\\": This adds additional context to the log entry using the \\",(0,t.jsx)(e.code,{children:\\"bq_fields\\"}),\\" dictionary. This context can include details making the log entries more informative and easier to analyze. This data will be later used to set up alerts and data anomaly detection jobs.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"monitoring-in-elastics-apm\\",children:\\"Monitoring in Elastic\'s APM\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As shown, we can examine traces, metrics, and logs in the APM interface. To make the most out of this data, we make use on top of nearly the whole suit of features in Elastic Observability alongside Elastic Analytic\'s ML capabilities.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"rules-and-alerts\\",children:\\"Rules and Alerts\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We can set up rules and alerts to detect anomalies, errors, and performance issues in our scripts.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsxs)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/apm-alerts.html#apm-create-error-alert\\",rel:\\"nofollow\\",children:[(0,t.jsx)(e.code,{children:\\"error count threshold\\"}),\\" rule\\"]}),\\" is used to create a trigger when the number of errors in a service exceeds a defined threshold.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"To create the rule go to Alerts and Insights -> Rules -> Create Rule -> Error count threshold, set the error count threshold, the service or environment you want to monitor (you can also set an error grouping key across services), how often to run the check, and choose a connector.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/error-count-threshold.png\\",alt:\\"6 - ETL Status Error Rule\\",width:\\"1246\\",height:\\"1416\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Next, we create a rule of type \\",(0,t.jsx)(e.code,{children:\\"custom threshold\\"}),\\" on a given ETL logs \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/data-views.html\\",rel:\\"nofollow\\",children:\\"data view\\"}),\' (create one for your index) filtering on \\"labels.status: error\\" to get all the logs with status error from any of the steps of the ETL which have failed. The rule condition is set to document count > 0. In our case, in the last section of the rule config, we also set up Slack \',(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/alerting-getting-started.html\\",rel:\\"nofollow\\",children:\\"alerts\\"}),\\" every time the rule is activated. You can pick from a long list of \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/action-types.html\\",rel:\\"nofollow\\",children:\\"connectors\\"}),\\" Elastic supports.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/etl-fail-status-rule.png\\",alt:\\"7 - ETL Status Error Rule\\",width:\\"1236\\",height:\\"1438\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then we can set up alerts for failures. We add status to the logs metadata as shown in the code sample below for each of the steps in the ETLs. It then becomes available in ES via \\",(0,t.jsx)(e.code,{children:\\"labels.status\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`logging.info(\\n \\"Elasticsearch search operation successful\\",\\n extra={\\n \\"event\\": \\"Elasticsearch Search\\",\\n \\"status\\": \\"success\\",\\n \\"category\\": category,\\n \\"index\\": index,\\n },\\n )\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"more-rules\\",children:\\"More Rules\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We could also add rules to detect anomalies in the execution time of the different spans we define. This is done by selecting transaction/span -> Alerts and rules -> Custom threshold rule -> Latency. In the example below, we want to generate an alert whenever the search step takes more than 25s.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/apm_custom_threshold_latency.png\\",alt:\\"8 - APM Custom Threshold - Latency\\",width:\\"2932\\",height:\\"900\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/apm_custom_threshold_latency_2.png\\",alt:\\"9 - APM Custom Threshold - Config\\",width:\\"1228\\",height:\\"1522\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Alternatively, for finer-grained control, you can go with Alerts and rules -> Anomaly rule, set up an anomaly job, and pick a threshold severity level.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/apm_anomaly_rule_config.png\\",alt:\\"10 - APM Anomaly Rule - Config\\",width:\\"1228\\",height:\\"1522\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"anomaly-detection-job\\",children:\\"Anomaly detection job\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this example we set an anomaly detection job on the number of documents before transform.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We set up an \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-ad-run-jobs.html\\",rel:\\"nofollow\\",children:\\"Anomaly Detection jobs\\"}),\\" on the number of document before the transform using the [Single metric job] (\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-anomaly-detection-job-types.html#multi-metric-jobs\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-anomaly-detection-job-types.html#multi-metric-jobs\\"}),\\") to detect any anomalies with the incoming data source.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/single-metrics.png\\",alt:\\"11 - Single Metrics\\",width:\\"2960\\",height:\\"1404\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the last step, you can create alerting similarly to what we did before to receive alerts whenever there is an anomaly detected, by setting up a severity level threshold. Using the anomaly score which is assigned to every anomaly, every anomaly is characterized by a severity level.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/anomaly-detection-alerting-1.png\\",alt:\\"12 - Anomaly detection Alerting - Severity\\",width:\\"2984\\",height:\\"1404\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Similarly to the previous example, we set up a Slack connector to receive alerts whenever an anomaly is detected.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/anomaly-detection-alerting-connectors.png\\",alt:\\"13 - Anomaly detection Alerting - Connectors\\",width:\\"2984\\",height:\\"1404\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can go to your custom dashboard by going to Add Panel -> ML -> Anomaly Swim Lane -> Pick your job.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Similarly, we add jobs for the number of documents after the transform, and a Multi-Metric one on the \\",(0,t.jsx)(e.code,{children:\\"execution_time_ms\\"}),\\", \\",(0,t.jsx)(e.code,{children:\\"bytes_processed\\"}),\\" and \\",(0,t.jsx)(e.code,{children:\\"rows_affected\\"}),\\" similarly to how it was done in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/monitor-dbt-pipelines-with-elastic-observability\\",rel:\\"nofollow\\",children:\\"Monitor your DBT pipelines with Elastic Observability\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"custom-dashboard\\",children:\\"Custom Dashboard\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now that your logs, metrics, and traces are in Elastic, you can use the full potential of our Kibana dashboards to extract the most from them. We can create a custom dashboard like the following one: a pie chart based on \\",(0,t.jsx)(e.code,{children:\\"labels.event\\"}),\\" (category field for every type of step in the ETLs), a chart for every type of step broken down by status, a timeline of steps broken down by status, BQ stats for the ETL, and anomaly detection swim lane panels for the various anomaly jobs.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/monitor-your-python-data-pipelines-with-otel/custom_dashboard.png\\",alt:\\"14 - Custom Dashboard\\",width:\\"2992\\",height:\\"2701\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic\\\\u2019s APM, in combination with other Observability and ML features, provides a unified view of our data pipelines, allowing us to bring a lot of value with minimal code changes:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Logging of new logs (no need to add custom logging) alongside their execution context\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Monitor the runtime behavior of our models\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Track data quality issues\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Identify and troubleshoot real-time incidents\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Optimize performance bottlenecks and resource usage\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Identify dependencies on other services and their latency\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Optimize data transformation processes\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Set up alerts on latency, data quality issues, error rates of transactions or CPU usage)\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"With these capabilities, we can ensure the resilience and reliability of our data pipelines, leading to more robust and accurate BI system and reporting.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In conclusion, setting up OpenTelemetry (OTEL) in Python for data pipeline observability has significantly improved our ability to monitor, detect, and resolve issues proactively. This has led to more reliable data transformations, better resource management, and enhanced overall performance of our data transformation, BI and Machine Learning systems.\\"})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return T(v);})();\\n;return Component;"},"_id":"articles/monitor-your-python-data-pipelines-with-otel.mdx","_raw":{"sourceFilePath":"articles/monitor-your-python-data-pipelines-with-otel.mdx","sourceFileName":"monitor-your-python-data-pipelines-with-otel.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/monitor-your-python-data-pipelines-with-otel"},"type":"Article","imageUrl":"/assets/images/monitor-your-python-data-pipelines-with-otel/main_image.jpg","readingTime":"17 min read","url":"/monitor-your-python-data-pipelines-with-otel","headings":[{"level":2,"title":"Introduction","href":"#introduction"},{"level":2,"title":"Motivation","href":"#motivation"},{"level":2,"title":"Steps for Instrumentation","href":"#steps-for-instrumentation"},{"level":3,"title":"Step 1: Import Required Libraries","href":"#step-1-import-required-libraries"},{"level":4,"title":"Explanation of Dependencies","href":"#explanation-of-dependencies"},{"level":3,"title":"Step 2: Export OTEL Variables","href":"#step-2-export-otel-variables"},{"level":4,"title":"Explanation of Variables","href":"#explanation-of-variables"},{"level":3,"title":"Running Python ETLs","href":"#running-python-etls"},{"level":4,"title":"Explanation of the Command","href":"#explanation-of-the-command"},{"level":3,"title":"Tracing","href":"#tracing"},{"level":3,"title":"Metrics ","href":"#metrics-"},{"level":3,"title":"Logging","href":"#logging"},{"level":4,"title":"Explanation of Logging Modification","href":"#explanation-of-logging-modification"},{"level":2,"title":"Monitoring in Elastic\'s APM","href":"#monitoring-in-elastics-apm"},{"level":3,"title":"Rules and Alerts ","href":"#rules-and-alerts-"},{"level":3,"title":"More Rules","href":"#more-rules"},{"level":3,"title":"Anomaly detection job","href":"#anomaly-detection-job"},{"level":2,"title":"Custom Dashboard ","href":"#custom-dashboard-"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Monitoring Android applications with Elastic APM","slug":"monitoring-android-applications-apm","date":"2023-03-21","description":"Elastic has launched its APM agent for Android applications, allowing developers to track key aspects of applications to help troubleshoot issues and performance flaws with mobile applications, corresponding backend services, and their interactions.","image":"illustration-indusrty-technology-social-1680x980.png","author":[{"slug":"alexander-wert","type":"Author","_raw":{}},{"slug":"cesar-munoz","type":"Author","_raw":{}}],"tags":[{"slug":"apm","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"mobile-apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nPeople are handling more and more matters on their smartphones through mobile apps both privately and professionally. With thousands or even millions of users, ensuring great [monitor application performance](https://www.elastic.co/observability/application-performance-monitoring) and reliability is a key challenge for providers and operators of mobile apps and related backend services. Understanding the behavior of mobile apps, the occurrences and types of crashes, the [root causes of slow response times](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions), and the real user impact of backend issues is key to managing the performance of mobile apps and associated backend services.\\n\\nElastic has launched its application performance monitoring ([APM](https://www.elastic.co/observability/application-performance-monitoring)) agent for Android applications, allowing developers to keep track of key aspects of their applications, from crashes and HTTP requests to screen rendering times and end-to-end distributed tracing. All of this helps troubleshoot issues and performance flaws with mobile applications, corresponding backend services, and their interaction. The Elastic APM Android Agent automatically instruments your application and its dependencies so that you can simply “plug-and-play” the agent into your application without having to worry about changing your codebase much.\\n\\nThe Elastic APM Android Agent has been developed from scratch on top of OpenTelemetry, an open standard and framework for observability. Developers will be able to take full advantage of its capabilities, as well as the support provided by a huge and active community. If you’re familiar with OpenTelemetry and your application is already instrumented with OpenTelemetry, then you can simply reuse it all when switching to the Elastic APM Android Agent. But no worries if that’s not the case — the agent is configured to handle common traceable scenarios automatically without having to deep dive into the specifics of the OpenTelemetry API.\\n\\n[Related article: [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)]\\n\\n## How it works\\n\\nThe Elastic APM Android Agent is a combination of an SDK plus a Gradle plugin. The SDK contains utilities that will let you initialize and configure the agent’s behavior, as well as prepare and initialize the OpenTelemetry SDK. You can use the SDK for programmatic configuration and initialization of the agent, in particular for advanced and special use cases.\\n\\nIn most cases, a programmatic configuration and initialization won’t be necessary. Instead, you can use the provided Gradle plugin to configure the agent and automatically instrument your app. The Gradle plugin uses Byte Buddy and the official Android Gradle plugin API under the hood to automatically inject instrumentation code into your app through compile-time transformation of your application’s and its dependencies’ classes.\\n\\nCompiling your app with the Elastic Android APM Agent Gradle Plugin configured and enabled will make your Android app report tracing data, metrics, and different events and logs at runtime.\\n\\n## Using the Elastic APM Agent in an Android app\\n\\nBy means of a [simple demo application](https://github.com/elastic/sample-app-android-apm), we’re going through the steps mentioned in the “[Set up the Agent](https://www.elastic.co/guide/en/apm/agent/android/current/setup.html)” guide to set up the Elastic Android APM Agent.\\n\\n### Prerequisites\\n\\nFor this example, you will need the following:\\n\\n- An Elastic Stack with APM enabled (We recommend using Elastic’s Cloud offering. [Try it for free](https://www.elastic.co/cloud/elasticsearch-service/signup?baymax=docs-body&elektra=docs).)\\n- Java 11+\\n- [Android Studio](https://developer.android.com/studio?gclid=Cj0KCQiAic6eBhCoARIsANlox87QsDnyjpKObQSivZz6DHMLTiL76CmqZGXTEqf4L7h3jQO7ljm8B14aAo4xEALw_wcB&gclsrc=aw.ds)\\n- [Android Emulator, AVD device](https://developer.android.com/studio/run/emulator)\\n\\nYou’ll also need a way to push the app’s [signals](https://opentelemetry.io/docs/concepts/signals/) into Elastic. Therefore, you will need Elastic APM’s [secret token](https://www.elastic.co/guide/en/apm/guide/current/secret-token.html#create-secret-token) that you’ll configure into our sample app later.\\n\\n### Test project for our example\\n\\nTo showcase an end-to-end scenario including distributed tracing, in this example, we’ll instrument a [simple weather application](https://github.com/elastic/sample-app-android-apm) that comprises two Android UI fragments and a simple local backend service based on Spring Boot.\\n\\nThe first fragment will have a dropdown list with some city names and also a button that takes you to the second one, where you’ll see the selected city’s current temperature. If you pick a non-European city on the first screen, you’ll get an error from the (local) backend when you head to the second screen. This is to demonstrate how network and backend errors are captured and correlated in Elastic APM.\\n\\n### Applying the Elastic APM Agent plugin\\n\\nIn the following, we will explain [all the steps required to set up the Elastic APM Android Agent](https://www.elastic.co/guide/en/apm/agent/android/current/setup.html) from scratch for an Android application. In case you want to skip these instructions and see the agent in action right away, use the main branch of that repo and apply only Step (3.b) before continuing with the next Section (“Setting up the local backend service”).\\n\\n1. Clone the [sample app](https://github.com/elastic/sample-app-android-apm) repo and open it in Android Studio.\\n2. Switch to the uninstrumented repo branch to start from a blank, uninstrumented Android application. You can run this command to switch to the uninstrumented branch:\\n\\n```bash\\ngit checkout uninstrumented\\n```\\n\\n3. Follow the Elastic APM Android Agent’s [setup guide](https://www.elastic.co/guide/en/apm/agent/android/current/setup.html):\\n\\nAdd the co.elastic.apm.android plugin to the app/build.gradle file (please make sure to use the latest version available of the plugin, which you can find [here](https://plugins.gradle.org/plugin/co.elastic.apm.android)).\\n\\nConfigure the agent’s connection to the Elastic APM backend by providing the ‘serverUrl’ and ‘secretToken’ in the ‘elasticAPM’ section of the app/build.gradle file.\\n\\n```java\\n// Android app\'s build.gradle file\\nplugins {\\n //...\\n id \\"co.elastic.apm.android\\" version \\"[latest_version]\\"\\n}\\n\\n//...\\n\\nelasticApm {\\n // Minimal configuration\\n serverUrl = \\"https://your.elastic.apm.endpoint\\"\\n\\n // Optional\\n serviceName = \\"weather-sample-app\\"\\n serviceVersion = \\"0.0.1\\"\\n secretToken = \\"your Elastic APM secret token\\"\\n}\\n```\\n\\n4. The only actual code change required is a one-liner to initialize the Elastic APM Android Agent in the Application.onCreate method. The application class for this sample app is located at app/src/main/java/co/elastic/apm/android/sample/MyApp.kt.\\n\\n```kotlin\\n\\npackage co.elastic.apm.android.sample\\n\\nimport android.app.Application\\nimport co.elastic.apm.android.sdk.ElasticApmAgent\\n\\nclass MyApp : Application() {\\n\\n override fun onCreate() {\\n super.onCreate()\\n ElasticApmAgent.initialize(this)\\n }\\n}\\n```\\n\\nBear in mind that for this example, we’re not changing the agent’s default configuration — if you want more information about how to do so, take a look at the agent’s [runtime configuration guide](https://www.elastic.co/guide/en/apm/agent/android/current/configuration.html#_runtime_configuration).\\n\\nBefore launching our Android Weather App, we need to configure and start the local weather-backend service as described in the next section.\\n\\n### Setting up the local backend service\\n\\nOne of the key features the agent provides is distributed tracing, which allows you to see the full end-to-end story of an HTTP transaction, starting from our mobile app and traversing instrumented backend services used by the app. Elastic APM will show you the full picture as one distributed trace, which comes in very handy for troubleshooting issues, especially the ones related to high latency and backend errors.\\n\\nAs part of our sample app, we’re going to launch a simple local backend service that will handle our app’s HTTP requests. The backend service is instrumented with the [Elastic APM Java agent](https://www.elastic.co/guide/en/apm/agent/java/current/index.html) to collect and send its own APM data over to Elastic APM, allowing it to correlate the mobile interactions with the processing of the backend requests.\\n\\nIn order to configure the local server, we need to set our Elastic APM endpoint and secret token (the same used for our Android app in the previous step) into the backend/src/main/resources/elasticapm.properties file:\\n\\n```bash\\nservice_name=weather-backend\\napplication_packages=co.elastic.apm.android.sample\\nserver_url=YOUR_ELASTIC_APM_URL\\nsecret_token=YOUR_ELASTIC_APM_SECRET_TOKEN\\n```\\n\\n### Launching the demo\\n\\nOur sample app will get automatic instrumentation for the agent’s currently [supported frameworks](https://www.elastic.co/guide/en/apm/agent/android/current/supported-technologies.html), which means that we’ll get to see screen rendering spans as well as OkHttp requests out of the box. For frameworks not currently supported, you could apply manual instrumentation to enrich your APM data (see “Manual Instrumentation” below).\\n\\nWe are ready to launch the demo. (The demo is meant to be executed on a local environment using an emulator for Android.) Therefore, we need to:\\n\\n1. Launch the backend service using this command in a terminal located in the root directory of our sample project: ./gradlew bootRun (or gradlew.bat bootRun if you’re on Windows). Alternatively, you can start the backend service from Android Studio.\\n2. Launch the weather sample app in an Android emulator (from Android Studio).\\n\\nOnce everything is running, we need to navigate around in the app to generate some load that we would like to observe in Elastic APM. So, select a city, click **Next** and repeat it multiple times. Please, also make sure to select **New York** at least once. You will see that the weather forecast won’t work for New York as the city. Below, we will use Elastic APM to find out what’s going wrong when selecting New York.\\n\\n![apm android city selection](/assets/images/monitoring-android-applications-apm/blog-elastic-android-apm-city-selection.png)\\n\\n## First glance at the APM results\\n\\nLet’s open Kibana and navigate to the Observability solution.\\n\\nUnder the Services navigation item, you should see a list of two services: our Android app **weather-sample-app** and the corresponding backend service **weather-backend**. Click on the **Service map** tab to see a visualization of the dependencies between those services and any external services.\\n\\n![apm android services](/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-services.png)\\n\\nClick on the **weather-sample-app** to dive into the dashboard for the Android app. The service view for mobile applications is in technical preview at the publishing of this blog post, but you can already see insightful information about the app on that screen. You see information like the amount of active sessions in the selected time frame, number of HTTP requests emitted by the weather-sample-app, geographical distribution of the requests as well as breakdowns on device models, OS versions, network connection types, and app versions. (Information on crashes and app load times are under development.)\\n\\nFor the purpose of demonstration, we kept this demo simple, so the data is less diversified and also rather limited. However, this kind of data is particularly useful when you are monitoring a mobile app with higher usage numbers and higher diversification on device models, OS versions, etc. Troubleshooting problems and performance issues becomes way easier when you can use these properties to filter and group your APM data. You can use the quick filters at the top to do so and see how the metrics adopt depending on your selection.\\n\\n![apm android weather sample app](/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-weather-sample-app.png)\\n\\nNow, let’s see how individual user interactions are processed, including downstream calls into the backend service. Under the Transactions tab (at the top), we see the different end-to-end transaction groups, including the two transactions for the FirstFragment and the SecondFragment.\\n\\n![apm android latency distribution](/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-latency-distribution.png)\\n\\nLet’s deep dive into the SecondFragment - View appearing transaction, to see the metrics (e.g., latency, throughput) for this transaction group and also the invocation waterfall view for the individual user interactions. As we can see in the following screenshot, after view creation, the fragment performs an HTTP GET request to 10.0.2.2, which takes ~130 milliseconds. In the same waterfall, we see that the HTTP call is processed by the weather-backend service, which itself conducts an HTTP call to api.open-meteo.com.\\n\\n![apm android trace samples](/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-trace-samples.png)\\n\\nNow, when looking at the waterfall view for a request where New York was selected as the city, we see an error happening on the backend service that explains why the forecast didn’t work for New York. By clicking on the red **View related error** badge, you will get details on the error and the actual root cause of the problem.\\n\\nThe exception message on the weather-backend states that “This service can only retrieve geo locations for European cities!” That’s the problem with selecting New York as the city.\\n\\n![apm android weather backend](/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-weather-backend.png)\\n\\n## Manual instrumentation\\n\\nAs previously mentioned, the Elastic APM Android Agent does a bunch of automatic instrumentation on your behalf for the [supported frameworks](https://www.elastic.co/guide/en/apm/agent/android/current/supported-technologies.html); however, in some cases, you might want to get extra instrumentation depending on your app’s use cases. For those cases, you’ve gotten covered by the OpenTelemetry API, which is what the Elastic APM Android Agent is based on. The OpenTelemetry Java SDK contains tools to create custom spans, metrics, and logs, and since it’s the base of the Elastic APM Android Agent, it’s available for you to use without having to add any extra dependencies into your project and without having to configure anything to connect your custom signals to your own Elastic environment either, as the agent does that for you.\\n\\nThe way to start would be by getting OpenTelemetry’s instance like so:\\n\\n```java\\nOpenTelemetry openTelemetry = GlobalOpenTelemetry.get();\\n```\\n\\nAnd then you can follow the instructions from the [OpenTelemetry Java documentation](https://opentelemetry.io/docs/instrumentation/java/manual/#acquiring-a-tracer) in order to create your custom signals. See the following example for the creation of a custom span:\\n\\n```java\\nOpenTelemetry openTelemetry = GlobalOpenTelemetry.get();\\nTracer tracer = openTelemetry.getTracer(\\"instrumentation-library-name\\", \\"1.0.0\\");\\nSpan span = tracer.spanBuilder(\\"my span\\").startSpan();\\n\\n// Make the span the current span\\ntry (Scope ss = span.makeCurrent()) {\\n // In this scope, the span is the current/active span\\n} finally {\\n span.end();\\n}\\n```\\n\\n## Conclusion\\n\\nIn this blog post, we demonstrated how you can use the Elastic APM Android Agent to achieve end-to-end observability into your Android-based mobile applications. Setting up the agent is a matter of a few minutes and the provided insights allow you to analyze your app’s performance and its dependencies on backend services. With the Elastic APM Android Agent in place, you can leverage Elastic’s rich APM feature as well as the various possibilities to customize your analysis workflows through custom instrumentation and custom dashboards.\\n\\nAre you curious? Then try it yourself. Sign up for a [free trial on the Elastic Cloud](https://www.elastic.co/cloud/elasticsearch-service/signup), enrich your Android app with the Elastic APM Android agent as described in this blog, and explore the data in [Elastic’s Observability solution](https://www.elastic.co/observability).\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var f=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var a in e)o(t,a,{get:e[a],enumerable:!0})},s=(t,e,a,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of m(e))!w.call(t,i)&&i!==a&&o(t,i,{get:()=>e[i],enumerable:!(r=u(e,i))||r.enumerable});return t};var b=(t,e,a)=>(a=t!=null?p(g(t)):{},s(e||!t||!t.__esModule?o(a,\\"default\\",{value:t,enumerable:!0}):a,t)),v=t=>s(o({},\\"__esModule\\",{value:!0}),t);var c=f((P,l)=>{l.exports=_jsx_runtime});var k={};y(k,{default:()=>h,frontmatter:()=>A});var n=b(c()),A={title:\\"Monitoring Android applications with Elastic APM\\",slug:\\"monitoring-android-applications-apm\\",date:\\"2023-03-21\\",description:\\"Elastic has launched its APM agent for Android applications, allowing developers to track key aspects of applications to help troubleshoot issues and performance flaws with mobile applications, corresponding backend services, and their interactions.\\",author:[{slug:\\"alexander-wert\\"},{slug:\\"cesar-munoz\\"}],image:\\"illustration-indusrty-technology-social-1680x980.png\\",tags:[{slug:\\"apm\\"},{slug:\\"opentelemetry\\"},{slug:\\"mobile-apm\\"}]};function d(t){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"People are handling more and more matters on their smartphones through mobile apps both privately and professionally. With thousands or even millions of users, ensuring great \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"monitor application performance\\"}),\\" and reliability is a key challenge for providers and operators of mobile apps and related backend services. Understanding the behavior of mobile apps, the occurrences and types of crashes, the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"root causes of slow response times\\"}),\\", and the real user impact of backend issues is key to managing the performance of mobile apps and associated backend services.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic has launched its application performance monitoring (\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"APM\\"}),\\") agent for Android applications, allowing developers to keep track of key aspects of their applications, from crashes and HTTP requests to screen rendering times and end-to-end distributed tracing. All of this helps troubleshoot issues and performance flaws with mobile applications, corresponding backend services, and their interaction. The Elastic APM Android Agent automatically instruments your application and its dependencies so that you can simply \\\\u201Cplug-and-play\\\\u201D the agent into your application without having to worry about changing your codebase much.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The Elastic APM Android Agent has been developed from scratch on top of OpenTelemetry, an open standard and framework for observability. Developers will be able to take full advantage of its capabilities, as well as the support provided by a huge and active community. If you\\\\u2019re familiar with OpenTelemetry and your application is already instrumented with OpenTelemetry, then you can simply reuse it all when switching to the Elastic APM Android Agent. But no worries if that\\\\u2019s not the case \\\\u2014 the agent is configured to handle common traceable scenarios automatically without having to deep dive into the specifics of the OpenTelemetry API.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"[Related article: \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"}),\\"]\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"how-it-works\\",children:\\"How it works\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The Elastic APM Android Agent is a combination of an SDK plus a Gradle plugin. The SDK contains utilities that will let you initialize and configure the agent\\\\u2019s behavior, as well as prepare and initialize the OpenTelemetry SDK. You can use the SDK for programmatic configuration and initialization of the agent, in particular for advanced and special use cases.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In most cases, a programmatic configuration and initialization won\\\\u2019t be necessary. Instead, you can use the provided Gradle plugin to configure the agent and automatically instrument your app. The Gradle plugin uses Byte Buddy and the official Android Gradle plugin API under the hood to automatically inject instrumentation code into your app through compile-time transformation of your application\\\\u2019s and its dependencies\\\\u2019 classes.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Compiling your app with the Elastic Android APM Agent Gradle Plugin configured and enabled will make your Android app report tracing data, metrics, and different events and logs at runtime.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"using-the-elastic-apm-agent-in-an-android-app\\",children:\\"Using the Elastic APM Agent in an Android app\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"By means of a \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/sample-app-android-apm\\",rel:\\"nofollow\\",children:\\"simple demo application\\"}),\\", we\\\\u2019re going through the steps mentioned in the \\\\u201C\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/android/current/setup.html\\",rel:\\"nofollow\\",children:\\"Set up the Agent\\"}),\\"\\\\u201D guide to set up the Elastic Android APM Agent.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"prerequisites\\",children:\\"Prerequisites\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"For this example, you will need the following:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"An Elastic Stack with APM enabled (We recommend using Elastic\\\\u2019s Cloud offering. \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/elasticsearch-service/signup?baymax=docs-body&elektra=docs\\",rel:\\"nofollow\\",children:\\"Try it for free\\"}),\\".)\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Java 11+\\"}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://developer.android.com/studio?gclid=Cj0KCQiAic6eBhCoARIsANlox87QsDnyjpKObQSivZz6DHMLTiL76CmqZGXTEqf4L7h3jQO7ljm8B14aAo4xEALw_wcB&gclsrc=aw.ds\\",rel:\\"nofollow\\",children:\\"Android Studio\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://developer.android.com/studio/run/emulator\\",rel:\\"nofollow\\",children:\\"Android Emulator, AVD device\\"})}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You\\\\u2019ll also need a way to push the app\\\\u2019s \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/concepts/signals/\\",rel:\\"nofollow\\",children:\\"signals\\"}),\\" into Elastic. Therefore, you will need Elastic APM\\\\u2019s \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/secret-token.html#create-secret-token\\",rel:\\"nofollow\\",children:\\"secret token\\"}),\\" that you\\\\u2019ll configure into our sample app later.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"test-project-for-our-example\\",children:\\"Test project for our example\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To showcase an end-to-end scenario including distributed tracing, in this example, we\\\\u2019ll instrument a \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/sample-app-android-apm\\",rel:\\"nofollow\\",children:\\"simple weather application\\"}),\\" that comprises two Android UI fragments and a simple local backend service based on Spring Boot.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The first fragment will have a dropdown list with some city names and also a button that takes you to the second one, where you\\\\u2019ll see the selected city\\\\u2019s current temperature. If you pick a non-European city on the first screen, you\\\\u2019ll get an error from the (local) backend when you head to the second screen. This is to demonstrate how network and backend errors are captured and correlated in Elastic APM.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"applying-the-elastic-apm-agent-plugin\\",children:\\"Applying the Elastic APM Agent plugin\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In the following, we will explain \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/android/current/setup.html\\",rel:\\"nofollow\\",children:\\"all the steps required to set up the Elastic APM Android Agent\\"}),\\" from scratch for an Android application. In case you want to skip these instructions and see the agent in action right away, use the main branch of that repo and apply only Step (3.b) before continuing with the next Section (\\\\u201CSetting up the local backend service\\\\u201D).\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Clone the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/sample-app-android-apm\\",rel:\\"nofollow\\",children:\\"sample app\\"}),\\" repo and open it in Android Studio.\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Switch to the uninstrumented repo branch to start from a blank, uninstrumented Android application. You can run this command to switch to the uninstrumented branch:\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`git checkout uninstrumented\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Follow the Elastic APM Android Agent\\\\u2019s \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/android/current/setup.html\\",rel:\\"nofollow\\",children:\\"setup guide\\"}),\\":\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Add the co.elastic.apm.android plugin to the app/build.gradle file (please make sure to use the latest version available of the plugin, which you can find \\",(0,n.jsx)(e.a,{href:\\"https://plugins.gradle.org/plugin/co.elastic.apm.android\\",rel:\\"nofollow\\",children:\\"here\\"}),\\").\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Configure the agent\\\\u2019s connection to the Elastic APM backend by providing the \\\\u2018serverUrl\\\\u2019 and \\\\u2018secretToken\\\\u2019 in the \\\\u2018elasticAPM\\\\u2019 section of the app/build.gradle file.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-java\\",children:`// Android app\'s build.gradle file\\nplugins {\\n //...\\n id \\"co.elastic.apm.android\\" version \\"[latest_version]\\"\\n}\\n\\n//...\\n\\nelasticApm {\\n // Minimal configuration\\n serverUrl = \\"https://your.elastic.apm.endpoint\\"\\n\\n // Optional\\n serviceName = \\"weather-sample-app\\"\\n serviceVersion = \\"0.0.1\\"\\n secretToken = \\"your Elastic APM secret token\\"\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,n.jsx)(e.li,{children:\\"The only actual code change required is a one-liner to initialize the Elastic APM Android Agent in the Application.onCreate method. The application class for this sample app is located at app/src/main/java/co/elastic/apm/android/sample/MyApp.kt.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-kotlin\\",children:`\\npackage co.elastic.apm.android.sample\\n\\nimport android.app.Application\\nimport co.elastic.apm.android.sdk.ElasticApmAgent\\n\\nclass MyApp : Application() {\\n\\n override fun onCreate() {\\n super.onCreate()\\n ElasticApmAgent.initialize(this)\\n }\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Bear in mind that for this example, we\\\\u2019re not changing the agent\\\\u2019s default configuration \\\\u2014 if you want more information about how to do so, take a look at the agent\\\\u2019s \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/android/current/configuration.html#_runtime_configuration\\",rel:\\"nofollow\\",children:\\"runtime configuration guide\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Before launching our Android Weather App, we need to configure and start the local weather-backend service as described in the next section.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"setting-up-the-local-backend-service\\",children:\\"Setting up the local backend service\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"One of the key features the agent provides is distributed tracing, which allows you to see the full end-to-end story of an HTTP transaction, starting from our mobile app and traversing instrumented backend services used by the app. Elastic APM will show you the full picture as one distributed trace, which comes in very handy for troubleshooting issues, especially the ones related to high latency and backend errors.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"As part of our sample app, we\\\\u2019re going to launch a simple local backend service that will handle our app\\\\u2019s HTTP requests. The backend service is instrumented with the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/java/current/index.html\\",rel:\\"nofollow\\",children:\\"Elastic APM Java agent\\"}),\\" to collect and send its own APM data over to Elastic APM, allowing it to correlate the mobile interactions with the processing of the backend requests.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In order to configure the local server, we need to set our Elastic APM endpoint and secret token (the same used for our Android app in the previous step) into the backend/src/main/resources/elasticapm.properties file:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`service_name=weather-backend\\napplication_packages=co.elastic.apm.android.sample\\nserver_url=YOUR_ELASTIC_APM_URL\\nsecret_token=YOUR_ELASTIC_APM_SECRET_TOKEN\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"launching-the-demo\\",children:\\"Launching the demo\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Our sample app will get automatic instrumentation for the agent\\\\u2019s currently \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/android/current/supported-technologies.html\\",rel:\\"nofollow\\",children:\\"supported frameworks\\"}),\\", which means that we\\\\u2019ll get to see screen rendering spans as well as OkHttp requests out of the box. For frameworks not currently supported, you could apply manual instrumentation to enrich your APM data (see \\\\u201CManual Instrumentation\\\\u201D below).\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"We are ready to launch the demo. (The demo is meant to be executed on a local environment using an emulator for Android.) Therefore, we need to:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Launch the backend service using this command in a terminal located in the root directory of our sample project: ./gradlew bootRun (or gradlew.bat bootRun if you\\\\u2019re on Windows). Alternatively, you can start the backend service from Android Studio.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Launch the weather sample app in an Android emulator (from Android Studio).\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Once everything is running, we need to navigate around in the app to generate some load that we would like to observe in Elastic APM. So, select a city, click \\",(0,n.jsx)(e.strong,{children:\\"Next\\"}),\\" and repeat it multiple times. Please, also make sure to select \\",(0,n.jsx)(e.strong,{children:\\"New York\\"}),\\" at least once. You will see that the weather forecast won\\\\u2019t work for New York as the city. Below, we will use Elastic APM to find out what\\\\u2019s going wrong when selecting New York.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/monitoring-android-applications-apm/blog-elastic-android-apm-city-selection.png\\",alt:\\"apm android city selection\\",width:\\"1999\\",height:\\"1344\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"first-glance-at-the-apm-results\\",children:\\"First glance at the APM results\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\\\\u2019s open Kibana and navigate to the Observability solution.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Under the Services navigation item, you should see a list of two services: our Android app \\",(0,n.jsx)(e.strong,{children:\\"weather-sample-app\\"}),\\" and the corresponding backend service \\",(0,n.jsx)(e.strong,{children:\\"weather-backend\\"}),\\". Click on the \\",(0,n.jsx)(e.strong,{children:\\"Service map\\"}),\\" tab to see a visualization of the dependencies between those services and any external services.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-services.png\\",alt:\\"apm android services\\",width:\\"1999\\",height:\\"1228\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Click on the \\",(0,n.jsx)(e.strong,{children:\\"weather-sample-app\\"}),\\" to dive into the dashboard for the Android app. The service view for mobile applications is in technical preview at the publishing of this blog post, but you can already see insightful information about the app on that screen. You see information like the amount of active sessions in the selected time frame, number of HTTP requests emitted by the weather-sample-app, geographical distribution of the requests as well as breakdowns on device models, OS versions, network connection types, and app versions. (Information on crashes and app load times are under development.)\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"For the purpose of demonstration, we kept this demo simple, so the data is less diversified and also rather limited. However, this kind of data is particularly useful when you are monitoring a mobile app with higher usage numbers and higher diversification on device models, OS versions, etc. Troubleshooting problems and performance issues becomes way easier when you can use these properties to filter and group your APM data. You can use the quick filters at the top to do so and see how the metrics adopt depending on your selection.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-weather-sample-app.png\\",alt:\\"apm android weather sample app\\",width:\\"1999\\",height:\\"1313\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now, let\\\\u2019s see how individual user interactions are processed, including downstream calls into the backend service. Under the Transactions tab (at the top), we see the different end-to-end transaction groups, including the two transactions for the FirstFragment and the SecondFragment.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-latency-distribution.png\\",alt:\\"apm android latency distribution\\",width:\\"1999\\",height:\\"1392\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Let\\\\u2019s deep dive into the SecondFragment - View appearing transaction, to see the metrics (e.g., latency, throughput) for this transaction group and also the invocation waterfall view for the individual user interactions. As we can see in the following screenshot, after view creation, the fragment performs an HTTP GET request to 10.0.2.2, which takes ~130 milliseconds. In the same waterfall, we see that the HTTP call is processed by the weather-backend service, which itself conducts an HTTP call to api.open-meteo.com.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-trace-samples.png\\",alt:\\"apm android trace samples\\",width:\\"1999\\",height:\\"1390\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now, when looking at the waterfall view for a request where New York was selected as the city, we see an error happening on the backend service that explains why the forecast didn\\\\u2019t work for New York. By clicking on the red \\",(0,n.jsx)(e.strong,{children:\\"View related error\\"}),\\" badge, you will get details on the error and the actual root cause of the problem.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The exception message on the weather-backend states that \\\\u201CThis service can only retrieve geo locations for European cities!\\\\u201D That\\\\u2019s the problem with selecting New York as the city.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/monitoring-android-applications-apm/blog-elastic-apm-android-weather-backend.png\\",alt:\\"apm android weather backend\\",width:\\"1999\\",height:\\"1376\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"manual-instrumentation\\",children:\\"Manual instrumentation\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"As previously mentioned, the Elastic APM Android Agent does a bunch of automatic instrumentation on your behalf for the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/android/current/supported-technologies.html\\",rel:\\"nofollow\\",children:\\"supported frameworks\\"}),\\"; however, in some cases, you might want to get extra instrumentation depending on your app\\\\u2019s use cases. For those cases, you\\\\u2019ve gotten covered by the OpenTelemetry API, which is what the Elastic APM Android Agent is based on. The OpenTelemetry Java SDK contains tools to create custom spans, metrics, and logs, and since it\\\\u2019s the base of the Elastic APM Android Agent, it\\\\u2019s available for you to use without having to add any extra dependencies into your project and without having to configure anything to connect your custom signals to your own Elastic environment either, as the agent does that for you.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The way to start would be by getting OpenTelemetry\\\\u2019s instance like so:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-java\\",children:`OpenTelemetry openTelemetry = GlobalOpenTelemetry.get();\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"And then you can follow the instructions from the \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/java/manual/#acquiring-a-tracer\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java documentation\\"}),\\" in order to create your custom signals. See the following example for the creation of a custom span:\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-java\\",children:`OpenTelemetry openTelemetry = GlobalOpenTelemetry.get();\\nTracer tracer = openTelemetry.getTracer(\\"instrumentation-library-name\\", \\"1.0.0\\");\\nSpan span = tracer.spanBuilder(\\"my span\\").startSpan();\\n\\n// Make the span the current span\\ntry (Scope ss = span.makeCurrent()) {\\n // In this scope, the span is the current/active span\\n} finally {\\n span.end();\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog post, we demonstrated how you can use the Elastic APM Android Agent to achieve end-to-end observability into your Android-based mobile applications. Setting up the agent is a matter of a few minutes and the provided insights allow you to analyze your app\\\\u2019s performance and its dependencies on backend services. With the Elastic APM Android Agent in place, you can leverage Elastic\\\\u2019s rich APM feature as well as the various possibilities to customize your analysis workflows through custom instrumentation and custom dashboards.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Are you curious? Then try it yourself. Sign up for a \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/elasticsearch-service/signup\\",rel:\\"nofollow\\",children:\\"free trial on the Elastic Cloud\\"}),\\", enrich your Android app with the Elastic APM Android agent as described in this blog, and explore the data in \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s Observability solution\\"}),\\".\\"]})]})}function h(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(d,{...t})}):d(t)}return v(k);})();\\n;return Component;"},"_id":"articles/monitoring-android-applications-elastic-apm.mdx","_raw":{"sourceFilePath":"articles/monitoring-android-applications-elastic-apm.mdx","sourceFileName":"monitoring-android-applications-elastic-apm.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/monitoring-android-applications-elastic-apm"},"type":"Article","imageUrl":"/assets/images/monitoring-android-applications-apm/illustration-indusrty-technology-social-1680x980.png","readingTime":"12 min read","url":"/monitoring-android-applications-apm","headings":[{"level":2,"title":"How it works","href":"#how-it-works"},{"level":2,"title":"Using the Elastic APM Agent in an Android app","href":"#using-the-elastic-apm-agent-in-an-android-app"},{"level":3,"title":"Prerequisites","href":"#prerequisites"},{"level":3,"title":"Test project for our example","href":"#test-project-for-our-example"},{"level":3,"title":"Applying the Elastic APM Agent plugin","href":"#applying-the-elastic-apm-agent-plugin"},{"level":3,"title":"Setting up the local backend service","href":"#setting-up-the-local-backend-service"},{"level":3,"title":"Launching the demo","href":"#launching-the-demo"},{"level":2,"title":"First glance at the APM results","href":"#first-glance-at-the-apm-results"},{"level":2,"title":"Manual instrumentation","href":"#manual-instrumentation"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Native OpenTelemetry support in Elastic Observability","slug":"native-opentelemetry-support-in-elastic-observability","date":"2023-09-13","description":"Elastic offers native support for OpenTelemetry by allowing for direct ingest of OpenTelemetry traces, metrics, and logs without conversion, and applying any Elastic feature against OTel data without degradation in capabilities.","image":"ecs-otel-announcement-2.jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}}],"body":{"raw":"\\nOpenTelemetry is more than just becoming the open ingestion standard for observability. As one of the major Cloud Native Computing Foundation (CNCF) projects, with as many commits as Kubernetes, it is gaining support from major ISVs and cloud providers delivering support for the framework. Many global companies from finance, insurance, tech, and other industries are starting to standardize on OpenTelemetry. With OpenTelemetry, DevOps teams have a consistent approach to collecting and ingesting telemetry data providing a de-facto standard for observability.\\n\\nElastic\xae is strategically standardizing on OpenTelemetry for the main data collection architecture for observability and security. Additionally, Elastic is making a commitment to help OpenTelemetry become the best de facto data collection infrastructure for the observability ecosystem. Elastic is deepening its relationship with OpenTelemetry beyond the recent contribution of Elastic Common Schema (ECS) to OpenTelemetry (OTel).\\n\\nToday, Elastic supports OpenTelemetry natively, since Elastic 7.14, by being able to directly ingest OpenTelemetry protocol (OTLP) based traces, metrics, and logs.\\n\\n![otel configuration options](/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-1-otel-config-options.png)\\n\\nIn this blog, we’ll review the current OpenTelemetry support provided by Elastic, which includes the following:\\n\\n- [**Easy ingest of distributed tracing and metrics**](#ingesting-opentelemetry-into-elastic) for applications configured with OpenTelemetry agents for Python, NodeJS, Java, Go, and .NET\\n- [**OpenTelemetry logs instrumentation and ingest**](#opentelemetry-logs-in-elastic) using various configurations\\n- [**Open semantic conventions**](#opentelemetry-is-elastics-preferred-schema) for logs and more through ECS, which is not part of OpenTelemetry\\n- [**Machine learning based AIOps capabilities**](#elastic-observability-apm-and-machine-learning-capabilities), such as latency correlations, failure correlations, anomaly detection, log spike analysis, predictive pattern analysis, Elastic AI Assistant support, and more, all apply to native OTLP telemetry.\\n- [**Migrate applications to OpenTelemetry at your own speed**](#elastic-allows-you-to-migrate-to-otel-on-your-schedule). Elastic’s APM capabilities all work seamlessly even with a mix of services using OpenTelemetry and/or Elastic APM agents. You can even combine OpenTelemetry instrumentation with Elastic Agent.\\n- [**Integrated views and analysis with Kubernetes clusters**](#integrated-kubernetes-and-opentelemetry-views-in-elastic), which most OpenTelemetry applications are running on. Elastic can highlight specific pods and containers related to each service when analyzing issues for applications based on OpenTelemetry.\\n\\n## Ingesting OpenTelemetry into Elastic\\n\\nIf you’re interested in seeing how simple it is to ingest OpenTelemetry traces and metrics into Elastic, follow the steps outlined in this blog.\\n\\nLet’s outline what Elastic provides for ingesting OpenTelemetry data. Here are all your options:\\n\\n![flowchart](/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-2-flowchart.png)\\n\\n### Using the OpenTelemetry Collector\\n\\nWhen using the OpenTelemetry Collector, which is the most common configuration option, you simply have to add two key variables.\\n\\nThe instructions utilize a specific opentelemetry-collector configuration for Elastic. Essentially, the Elastic [values.yaml](https://github.com/elastic/opentelemetry-demo/blob/main/kubernetes/elastic-helm/values.yaml) file specified in the elastic/opentelemetry-demo configure the opentelemetry-collector to point to the Elastic APM Server using two main values:\\n\\nOTEL_EXPORTER_OTLP_ENDPOINT is Elastic’s APM Server \\nOTEL_EXPORTER_OTLP_HEADERS Elastic Authorization\\n\\nThese two values can be found in the OpenTelemetry setup instructions under the APM integration instructions (Integrations-\\\\>APM) in your Elastic Cloud.\\n\\n### Native OpenTelemetry agents embedded in code\\n\\nIf you are thinking of using OpenTelemetry libraries in your code, you can simply point the service to Elastic’s APM server, because it supports native OLTP protocol. No special Elastic conversion is needed.\\n\\nTo demonstrate this effectively and provide some education on how to use OpenTelemetry, we have two applications you can use to learn from:\\n\\n- [Elastic’s version of OpenTelemetry demo](https://github.com/elastic/opentelemetry-demo): As with all the other observability vendors, we have our own forked version of the OpenTelemetry demo.\\n- [Elastiflix:](https://github.com/elastic/workshops-instruqt/tree/main/Elastiflix) This demo application is an example to help you learn how to instrument on various languages and telemetry signals.\\n\\nCheck out our blogs on using the Elastiflix application and instrumenting with OpenTelemetry:\\n\\n- [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n- Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n- Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n- Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n- .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n\\nWe have created YouTube videos on these topics as well:\\n\\n- [How to Manually Instrument Java with OpenTelemetry (Part 1)](https://youtu.be/wMXMRsjFg-8?feature=shared)\\n- [How to Manually Instrument Java with OpenTelemetry (Part 2)](https://youtu.be/PX7s6RRLGaU?feature=shared)\\n- [Custom Java Instrumentation with OpenTelemetry](https://youtu.be/hXTlV_RnELc?feature=shared)\\n- [Elastic APM - Automatic .NET Instrumentation with OpenTelemetry](https://youtu.be/E8g9u_uOFO4?feature=shared)\\n- [How to Manually Instrument .NET Applications with OpenTelemetry](https://youtu.be/7J9M2JsHwRE?feature=shared)\\n\\nGiven Elastic and OpenTelemetry’s vast user base, these provide a rich source of education for anyone trying to learn the intricacies of instrumenting with OpenTelemetry.\\n\\n### Elastic Agents supporting OpenTelemetry\\n\\nIf you’ve already implemented OpenTelemetry, you can still use them with OpenTelemetry. [Elastic APM agents today are able to ship OpenTelemetry](https://www.elastic.co/blog/opentelemetry-instrumentation-elastic-apm-agent-features) spans as part of a trace. This means that if you have any component in your application that emits an OpenTelemetry span, it’ll be part of the trace the Elastic APM agent captures.\\n\\n## OpenTelemetry logs in Elastic\\n\\nIf you look at OpenTelemetry documentation, you will see that a lot of language libraries are still in experimental or not implemented yet state. Java is in stable state, per the documentation. Depending on your service’s language, and your appetite for adventure, there exist several options for exporting logs from your services and applications and marrying them together in your observability backend.\\n\\nIn a previous blog, we discussed [3 different configurations to properly get logging data into Elastic for Java](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic). The blog explores the current state of the art of OpenTelemetry logging and provides guidance on the available approaches with the following tenants in mind:\\n\\n- Correlation of service logs with OTel-generated tracing where applicable\\n- Proper capture of exceptions\\n- Common context across tracing, metrics, and logging\\n- Support for slf4j key-value pairs (“structured logging”)\\n- Automatic attachment of metadata carried between services via OTel baggage\\n- Use of an Elastic Observability backend\\n- Consistent data fidelity in Elastic regardless of the approach taken\\n\\nThree models, which are covered in the blog, currently exist for getting your application or service logs to Elastic with correlation to OTel tracing and baggage:\\n\\n- Output logs from your service (alongside traces and metrics) using an embedded OpenTelemetry Instrumentation library to Elastic via the OTLP protocol\\n- Write logs from your service to a file scrapped by the OpenTelemetry Collector, which then forwards to Elastic via the OTLP protocol\\n- Write logs from your service to a file scrapped by Elastic Agent (or Filebeat), which then forwards to Elastic via an Elastic-defined protocol\\n\\nNote that (1), in contrast to (2) and (3), does not involve writing service logs to a file prior to ingestion into Elastic.\\n\\n## OpenTelemetry is Elastic’s preferred schema\\n\\nElastic recently contributed the [Elastic Common Schema (ECS) to the OpenTelemetry (OTel)](https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/) project, enabling a unified data specification for security and observability data within the OTel Semantic Conventions framework.\\n\\nECS, an open source specification, was developed with support from the Elastic user community to define a common set of fields to be used when storing event data in Elasticsearch\xae. ECS helps reduce management and storage costs stemming from data duplication, improving operational efficiency.\\n\\nSimilarly, OTel’s Semantic Conventions (SemConv) also specify common names for various kinds of operations and data. The benefit of using OTel SemConv is in following a common naming scheme that can be standardized across a codebase, libraries, and platforms for OTel users.\\n\\nThe merging of ECS and OTel SemConv will help advance OTel’s adoption and the continued evolution and convergence of observability and security domains.\\n\\n## Elastic Observability APM and machine learning capabilities\\n\\nAll of Elastic Observability’s APM capabilities are available with OTel data (read more on this in our blog, [Independence with OpenTelemetry](https://www.elastic.co/blog/opentelemetry-observability)):\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services\\n- Transactions (traces)\\n- ML correlations (specifically for latency)\\n- Service logs\\n\\n![services](/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-3-services.png)\\n\\nIn addition to Elastic’s APM and unified view of the telemetry data, you will now be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR. Here are some of the ML based AIOps capabilities we have:\\n\\n- [**Anomaly detection:**](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability) Elastic Observability, when turned on ([see documentation](https://www.elastic.co/guide/en/kibana/current/xpack-ml-anomalies.html)), automatically detects anomalies by continuously modeling the normal behavior of your OpenTelemetry data — learning trends, periodicity, and more.\\n- [**Log categorization:**](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability) Elastic also identifies patterns in your OpenTelemetry log events quickly, so that you can take action quicker.\\n- **High-latency or erroneous transactions:** Elastic Observability’s APM capability helps you discover which attributes are contributing to increased transaction latency and identifies which attributes are most influential in distinguishing between transaction failures and successes.\\n- [**Log spike detector**](https://www.elastic.co/blog/observability-logs-machine-learning-aiops) helps identify reasons for increases in OpenTelemetry log rates. It makes it easy to find and investigate causes of unusual spikes by using the analysis workflow view.\\n- [**Log pattern analysis**](https://www.elastic.co/blog/observability-logs-machine-learning-aiops) helps you find patterns in unstructured log messages and makes it easier to examine your data.\\n\\n## Elastic allows you to migrate to OTel on your schedule\\n\\nAlthough OpenTelemetry supports many programming languages, the [status of its major functional components](https://opentelemetry.io/docs/instrumentation/) — metrics, traces, and logs — are still at various stages. Thus migrating applications written in Java, Python, and JavaScript are good choices to start with as their metrics, traces, and logs (for Java) are stable.\\n\\nFor the other languages that are not yet supported, you can easily instrument those using Elastic Agents, therefore running your [full stack observability platform](https://www.elastic.co/observability) in mixed mode (Elastic agents with OpenTelemetry agents).\\n\\nHere is a simple example:\\n\\n![services 2](/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-4-services2.png)\\n\\nThe above shows a simple variation of our standard Elastic Agent application with one service flipped to OTel — the newsletter-otel service. But we can easily and as needed convert each of these services to OTel as development resources allow.\\n\\nHence you can migrate what you need to OpenTelemetry with Elastic as specific languages reach a stable state, and you can then continue your migration to OpenTelemetry agents.\\n\\n## Integrated Kubernetes and OpenTelemetry views in Elastic\\n\\nElastic manages your Kubernetes cluster using the Elastic Agent, and you can use it on your Kubernetes cluster where your OpenTelemetry application is running. Hence you can not only use OpenTelemetry for your application, but Elastic can also monitor the corresponding Kubernetes cluster.\\n\\nThere are two configurations for Kubernetes:\\n\\n**1. Simply deploying the Elastic Agent daemon set on the kubernetes cluster.** We outline this out in the article entitled [Managing your Kubernetes cluster with Elastic Observability](https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring). This would also push just the Kubernetes metrics and logs to Elastic.\\n\\n![elastic cloud nodes](/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-5-cloud-nodes.png)\\n\\n**2. Deploying the Elastic Agent with not only the Kubernetes Daemon set, but also Elastic’s APM integration, the Defend (Security) integration, and Network Packet capture integration** to provide more comprehensive Kubernetes cluster observability. We outline this configuration in the following article [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry).\\n\\n![flowchart](/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-6-flowhcart.png)\\n\\nBoth [OpenTelemetry visualization](https://www.elastic.co/observability/opentelemetry) examples use the OpenTelemetry demo, and in Elastic, we tie the Kubernetes information with the application to provide you an ability to see Kubernetes information from your traces in APM. This provides a more integrated approach when troubleshooting.\\n\\n![pod details](/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-7-pod-deets.png)\\n\\n## Summary\\n\\nIn essence, Elastic\'s commitment goes beyond mere support for OpenTelemetry. We are dedicated to ensuring our customers not only adopt OpenTelemetry but thrive with it. Through our solutions, expertise, and resources, we aim to elevate the observability journey for every business, turning data into actionable insights that drive growth and innovation.\\n\\n> Developer resources:\\n>\\n> - [Elastiflix application](https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app), a guide to instrument different languages with OpenTelemetry\\n> - Python: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry)\\n> - Java: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry)\\n> - Node.js: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry)\\n> - .NET: [Auto-instrumentation](https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry), [Manual-instrumentation](https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry)\\n> - Go: [Manual-instrumentation](https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry)\\n> - [Best practices for OpenTelemetry](https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry)\\n>\\n> General configuration and use case resources:\\n>\\n> - [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability)\\n> - [Modern observability and security on Kubernetes with Elastic and OpenTelemetry](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry)\\n> - [3 models for logging with OpenTelemetry and Elastic](https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic)\\n> - [Adding free and open Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment)\\n> - [Capturing custom metrics through OpenTelemetry API in code with Elastic](https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin)\\n> - [Future-proof your observability platform with OpenTelemetry and Elastic](https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic)\\n> - [Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more](https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf)\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var d=Object.create;var l=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)l(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!y.call(n,o)&&o!==i&&l(n,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return n};var b=(n,e,i)=>(i=n!=null?d(g(n)):{},r(e||!n||!n.__esModule?l(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>r(l({},\\"__esModule\\",{value:!0}),n);var c=f((A,s)=>{s.exports=_jsx_runtime});var O={};w(O,{default:()=>p,frontmatter:()=>T});var t=b(c()),T={title:\\"Native OpenTelemetry support in Elastic Observability\\",slug:\\"native-opentelemetry-support-in-elastic-observability\\",date:\\"2023-09-13\\",description:\\"Elastic offers native support for OpenTelemetry by allowing for direct ingest of OpenTelemetry traces, metrics, and logs without conversion, and applying any Elastic feature against OTel data without degradation in capabilities.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"ecs-otel-announcement-2.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"kubernetes\\"}]};function h(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"OpenTelemetry is more than just becoming the open ingestion standard for observability. As one of the major Cloud Native Computing Foundation (CNCF) projects, with as many commits as Kubernetes, it is gaining support from major ISVs and cloud providers delivering support for the framework. Many global companies from finance, insurance, tech, and other industries are starting to standardize on OpenTelemetry. With OpenTelemetry, DevOps teams have a consistent approach to collecting and ingesting telemetry data providing a de-facto standard for observability.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" is strategically standardizing on OpenTelemetry for the main data collection architecture for observability and security. Additionally, Elastic is making a commitment to help OpenTelemetry become the best de facto data collection infrastructure for the observability ecosystem. Elastic is deepening its relationship with OpenTelemetry beyond the recent contribution of Elastic Common Schema (ECS) to OpenTelemetry (OTel).\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Today, Elastic supports OpenTelemetry natively, since Elastic 7.14, by being able to directly ingest OpenTelemetry protocol (OTLP) based traces, metrics, and logs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-1-otel-config-options.png\\",alt:\\"otel configuration options\\",width:\\"1365\\",height:\\"687\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we\\\\u2019ll review the current OpenTelemetry support provided by Elastic, which includes the following:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"#ingesting-opentelemetry-into-elastic\\",children:(0,t.jsx)(e.strong,{children:\\"Easy ingest of distributed tracing and metrics\\"})}),\\" for applications configured with OpenTelemetry agents for Python, NodeJS, Java, Go, and .NET\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"#opentelemetry-logs-in-elastic\\",children:(0,t.jsx)(e.strong,{children:\\"OpenTelemetry logs instrumentation and ingest\\"})}),\\" using various configurations\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"#opentelemetry-is-elastics-preferred-schema\\",children:(0,t.jsx)(e.strong,{children:\\"Open semantic conventions\\"})}),\\" for logs and more through ECS, which is not part of OpenTelemetry\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"#elastic-observability-apm-and-machine-learning-capabilities\\",children:(0,t.jsx)(e.strong,{children:\\"Machine learning based AIOps capabilities\\"})}),\\", such as latency correlations, failure correlations, anomaly detection, log spike analysis, predictive pattern analysis, Elastic AI Assistant support, and more, all apply to native OTLP telemetry.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"#elastic-allows-you-to-migrate-to-otel-on-your-schedule\\",children:(0,t.jsx)(e.strong,{children:\\"Migrate applications to OpenTelemetry at your own speed\\"})}),\\". Elastic\\\\u2019s APM capabilities all work seamlessly even with a mix of services using OpenTelemetry and/or Elastic APM agents. You can even combine OpenTelemetry instrumentation with Elastic Agent.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"#integrated-kubernetes-and-opentelemetry-views-in-elastic\\",children:(0,t.jsx)(e.strong,{children:\\"Integrated views and analysis with Kubernetes clusters\\"})}),\\", which most OpenTelemetry applications are running on. Elastic can highlight specific pods and containers related to each service when analyzing issues for applications based on OpenTelemetry.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"ingesting-opentelemetry-into-elastic\\",children:\\"Ingesting OpenTelemetry into Elastic\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you\\\\u2019re interested in seeing how simple it is to ingest OpenTelemetry traces and metrics into Elastic, follow the steps outlined in this blog.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s outline what Elastic provides for ingesting OpenTelemetry data. Here are all your options:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-2-flowchart.png\\",alt:\\"flowchart\\",width:\\"1742\\",height:\\"1070\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"using-the-opentelemetry-collector\\",children:\\"Using the OpenTelemetry Collector\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"When using the OpenTelemetry Collector, which is the most common configuration option, you simply have to add two key variables.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The instructions utilize a specific opentelemetry-collector configuration for Elastic. Essentially, the Elastic \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo/blob/main/kubernetes/elastic-helm/values.yaml\\",rel:\\"nofollow\\",children:\\"values.yaml\\"}),\\" file specified in the elastic/opentelemetry-demo configure the opentelemetry-collector to point to the Elastic APM Server using two main values:\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"OTEL_EXPORTER_OTLP_ENDPOINT is Elastic\\\\u2019s APM Server\\",(0,t.jsx)(e.br,{}),`\\n`,\\"OTEL_EXPORTER_OTLP_HEADERS Elastic Authorization\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"These two values can be found in the OpenTelemetry setup instructions under the APM integration instructions (Integrations->APM) in your Elastic Cloud.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"native-opentelemetry-agents-embedded-in-code\\",children:\\"Native OpenTelemetry agents embedded in code\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you are thinking of using OpenTelemetry libraries in your code, you can simply point the service to Elastic\\\\u2019s APM server, because it supports native OLTP protocol. No special Elastic conversion is needed.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To demonstrate this effectively and provide some education on how to use OpenTelemetry, we have two applications you can use to learn from:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s version of OpenTelemetry demo\\"}),\\": As with all the other observability vendors, we have our own forked version of the OpenTelemetry demo.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/workshops-instruqt/tree/main/Elastiflix\\",rel:\\"nofollow\\",children:\\"Elastiflix:\\"}),\\" This demo application is an example to help you learn how to instrument on various languages and telemetry signals.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Check out our blogs on using the Elastiflix application and instrumenting with OpenTelemetry:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We have created YouTube videos on these topics as well:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://youtu.be/wMXMRsjFg-8?feature=shared\\",rel:\\"nofollow\\",children:\\"How to Manually Instrument Java with OpenTelemetry (Part 1)\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://youtu.be/PX7s6RRLGaU?feature=shared\\",rel:\\"nofollow\\",children:\\"How to Manually Instrument Java with OpenTelemetry (Part 2)\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://youtu.be/hXTlV_RnELc?feature=shared\\",rel:\\"nofollow\\",children:\\"Custom Java Instrumentation with OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://youtu.be/E8g9u_uOFO4?feature=shared\\",rel:\\"nofollow\\",children:\\"Elastic APM - Automatic .NET Instrumentation with OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://youtu.be/7J9M2JsHwRE?feature=shared\\",rel:\\"nofollow\\",children:\\"How to Manually Instrument .NET Applications with OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Given Elastic and OpenTelemetry\\\\u2019s vast user base, these provide a rich source of education for anyone trying to learn the intricacies of instrumenting with OpenTelemetry.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"elastic-agents-supporting-opentelemetry\\",children:\\"Elastic Agents supporting OpenTelemetry\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you\\\\u2019ve already implemented OpenTelemetry, you can still use them with OpenTelemetry. \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-instrumentation-elastic-apm-agent-features\\",rel:\\"nofollow\\",children:\\"Elastic APM agents today are able to ship OpenTelemetry\\"}),\\" spans as part of a trace. This means that if you have any component in your application that emits an OpenTelemetry span, it\\\\u2019ll be part of the trace the Elastic APM agent captures.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"opentelemetry-logs-in-elastic\\",children:\\"OpenTelemetry logs in Elastic\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you look at OpenTelemetry documentation, you will see that a lot of language libraries are still in experimental or not implemented yet state. Java is in stable state, per the documentation. Depending on your service\\\\u2019s language, and your appetite for adventure, there exist several options for exporting logs from your services and applications and marrying them together in your observability backend.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In a previous blog, we discussed \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 different configurations to properly get logging data into Elastic for Java\\"}),\\". The blog explores the current state of the art of OpenTelemetry logging and provides guidance on the available approaches with the following tenants in mind:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Correlation of service logs with OTel-generated tracing where applicable\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Proper capture of exceptions\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Common context across tracing, metrics, and logging\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Support for slf4j key-value pairs (\\\\u201Cstructured logging\\\\u201D)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Automatic attachment of metadata carried between services via OTel baggage\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Use of an Elastic Observability backend\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Consistent data fidelity in Elastic regardless of the approach taken\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Three models, which are covered in the blog, currently exist for getting your application or service logs to Elastic with correlation to OTel tracing and baggage:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Output logs from your service (alongside traces and metrics) using an embedded OpenTelemetry Instrumentation library to Elastic via the OTLP protocol\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Write logs from your service to a file scrapped by the OpenTelemetry Collector, which then forwards to Elastic via the OTLP protocol\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Write logs from your service to a file scrapped by Elastic Agent (or Filebeat), which then forwards to Elastic via an Elastic-defined protocol\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Note that (1), in contrast to (2) and (3), does not involve writing service logs to a file prior to ingestion into Elastic.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"opentelemetry-is-elastics-preferred-schema\\",children:\\"OpenTelemetry is Elastic\\\\u2019s preferred schema\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic recently contributed the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2023/ecs-otel-semconv-convergence/\\",rel:\\"nofollow\\",children:\\"Elastic Common Schema (ECS) to the OpenTelemetry (OTel)\\"}),\\" project, enabling a unified data specification for security and observability data within the OTel Semantic Conventions framework.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"ECS, an open source specification, was developed with support from the Elastic user community to define a common set of fields to be used when storing event data in Elasticsearch\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\". ECS helps reduce management and storage costs stemming from data duplication, improving operational efficiency.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Similarly, OTel\\\\u2019s Semantic Conventions (SemConv) also specify common names for various kinds of operations and data. The benefit of using OTel SemConv is in following a common naming scheme that can be standardized across a codebase, libraries, and platforms for OTel users.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The merging of ECS and OTel SemConv will help advance OTel\\\\u2019s adoption and the continued evolution and convergence of observability and security domains.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"elastic-observability-apm-and-machine-learning-capabilities\\",children:\\"Elastic Observability APM and machine learning capabilities\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data (read more on this in our blog, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry\\"}),\\"):\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Dependencies between services\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"ML correlations (specifically for latency)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service logs\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-3-services.png\\",alt:\\"services\\",width:\\"1999\\",height:\\"1055\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and unified view of the telemetry data, you will now be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR. Here are some of the ML based AIOps capabilities we have:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"Anomaly detection:\\"})}),\\" Elastic Observability, when turned on (\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/xpack-ml-anomalies.html\\",rel:\\"nofollow\\",children:\\"see documentation\\"}),\\"), automatically detects anomalies by continuously modeling the normal behavior of your OpenTelemetry data \\\\u2014 learning trends, periodicity, and more.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"Log categorization:\\"})}),\\" Elastic also identifies patterns in your OpenTelemetry log events quickly, so that you can take action quicker.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"High-latency or erroneous transactions:\\"}),\\" Elastic Observability\\\\u2019s APM capability helps you discover which attributes are contributing to increased transaction latency and identifies which attributes are most influential in distinguishing between transaction failures and successes.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-machine-learning-aiops\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"Log spike detector\\"})}),\\" helps identify reasons for increases in OpenTelemetry log rates. It makes it easy to find and investigate causes of unusual spikes by using the analysis workflow view.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-machine-learning-aiops\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"Log pattern analysis\\"})}),\\" helps you find patterns in unstructured log messages and makes it easier to examine your data.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"elastic-allows-you-to-migrate-to-otel-on-your-schedule\\",children:\\"Elastic allows you to migrate to OTel on your schedule\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Although OpenTelemetry supports many programming languages, the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/\\",rel:\\"nofollow\\",children:\\"status of its major functional components\\"}),\\" \\\\u2014 metrics, traces, and logs \\\\u2014 are still at various stages. Thus migrating applications written in Java, Python, and JavaScript are good choices to start with as their metrics, traces, and logs (for Java) are stable.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For the other languages that are not yet supported, you can easily instrument those using Elastic Agents, therefore running your \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"full stack observability platform\\"}),\\" in mixed mode (Elastic agents with OpenTelemetry agents).\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here is a simple example:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-4-services2.png\\",alt:\\"services 2\\",width:\\"1790\\",height:\\"909\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The above shows a simple variation of our standard Elastic Agent application with one service flipped to OTel \\\\u2014 the newsletter-otel service. But we can easily and as needed convert each of these services to OTel as development resources allow.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Hence you can migrate what you need to OpenTelemetry with Elastic as specific languages reach a stable state, and you can then continue your migration to OpenTelemetry agents.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"integrated-kubernetes-and-opentelemetry-views-in-elastic\\",children:\\"Integrated Kubernetes and OpenTelemetry views in Elastic\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic manages your Kubernetes cluster using the Elastic Agent, and you can use it on your Kubernetes cluster where your OpenTelemetry application is running. Hence you can not only use OpenTelemetry for your application, but Elastic can also monitor the corresponding Kubernetes cluster.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are two configurations for Kubernetes:\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"1. Simply deploying the Elastic Agent daemon set on the kubernetes cluster.\\"}),\\" We outline this out in the article entitled \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-cluster-metrics-logs-monitoring\\",rel:\\"nofollow\\",children:\\"Managing your Kubernetes cluster with Elastic Observability\\"}),\\". This would also push just the Kubernetes metrics and logs to Elastic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-5-cloud-nodes.png\\",alt:\\"elastic cloud nodes\\",width:\\"812\\",height:\\"481\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"2. Deploying the Elastic Agent with not only the Kubernetes Daemon set, but also Elastic\\\\u2019s APM integration, the Defend (Security) integration, and Network Packet capture integration\\"}),\\" to provide more comprehensive Kubernetes cluster observability. We outline this configuration in the following article \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-6-flowhcart.png\\",alt:\\"flowchart\\",width:\\"1337\\",height:\\"1140\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Both \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"OpenTelemetry visualization\\"}),\\" examples use the OpenTelemetry demo, and in Elastic, we tie the Kubernetes information with the application to provide you an ability to see Kubernetes information from your traces in APM. This provides a more integrated approach when troubleshooting.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/native-opentelemetry-support-in-elastic-observability/elastic-blog-7-pod-deets.png\\",alt:\\"pod details\\",width:\\"1573\\",height:\\"1006\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In essence, Elastic\'s commitment goes beyond mere support for OpenTelemetry. We are dedicated to ensuring our customers not only adopt OpenTelemetry but thrive with it. Through our solutions, expertise, and resources, we aim to elevate the observability journey for every business, turning data into actionable insights that drive growth and innovation.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Developer resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-opentelemetry-instrumentation-sample-app\\",rel:\\"nofollow\\",children:\\"Elastiflix application\\"}),\\", a guide to instrument different languages with OpenTelemetry\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Python: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-python-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Java: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-java-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Node.js: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrument-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-nodejs-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\".NET: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/auto-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Auto-instrumentation\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/manual-instrumentation-of-net-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Go: \\",(0,t.jsx)(e.a,{href:\\"https://elastic.co/blog/manual-instrumentation-of-go-applications-opentelemetry\\",rel:\\"nofollow\\",children:\\"Manual-instrumentation\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/best-practices-instrumenting-opentelemetry\\",rel:\\"nofollow\\",children:\\"Best practices for OpenTelemetry\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"General configuration and use case resources:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Modern observability and security on Kubernetes with Elastic and OpenTelemetry\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/3-models-logging-opentelemetry-elastic\\",rel:\\"nofollow\\",children:\\"3 models for logging with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Adding free and open Elastic APM as part of your Elastic Observability deployment\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/custom-metrics-app-code-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"Capturing custom metrics through OpenTelemetry API in code with Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/future-proof-your-observability-platform-with-opentelemetry-and-elastic\\",rel:\\"nofollow\\",children:\\"Future-proof your observability platform with OpenTelemetry and Elastic\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-k8s-observability-elasticsearch-cncf\\",rel:\\"nofollow\\",children:\\"Elastic Observability: Built for open technologies like Kubernetes, OpenTelemetry, Prometheus, Istio, and more\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function p(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(O);})();\\n;return Component;"},"_id":"articles/native-opentelemetry-support-in-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/native-opentelemetry-support-in-elastic-observability.mdx","sourceFileName":"native-opentelemetry-support-in-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/native-opentelemetry-support-in-elastic-observability"},"type":"Article","imageUrl":"/assets/images/native-opentelemetry-support-in-elastic-observability/ecs-otel-announcement-2.jpg","readingTime":"11 min read","url":"/native-opentelemetry-support-in-elastic-observability","headings":[{"level":2,"title":"Ingesting OpenTelemetry into Elastic","href":"#ingesting-opentelemetry-into-elastic"},{"level":3,"title":"Using the OpenTelemetry Collector","href":"#using-the-opentelemetry-collector"},{"level":3,"title":"Native OpenTelemetry agents embedded in code","href":"#native-opentelemetry-agents-embedded-in-code"},{"level":3,"title":"Elastic Agents supporting OpenTelemetry","href":"#elastic-agents-supporting-opentelemetry"},{"level":2,"title":"OpenTelemetry logs in Elastic","href":"#opentelemetry-logs-in-elastic"},{"level":2,"title":"OpenTelemetry is Elastic’s preferred schema","href":"#opentelemetry-is-elastics-preferred-schema"},{"level":2,"title":"Elastic Observability APM and machine learning capabilities","href":"#elastic-observability-apm-and-machine-learning-capabilities"},{"level":2,"title":"Elastic allows you to migrate to OTel on your schedule","href":"#elastic-allows-you-to-migrate-to-otel-on-your-schedule"},{"level":2,"title":"Integrated Kubernetes and OpenTelemetry views in Elastic","href":"#integrated-kubernetes-and-opentelemetry-views-in-elastic"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Introducing Elastic Observability\'s new Synthetic Monitoring: Designed for seamless GitOps management and SRE-focused workflows","slug":"new-synthetic-monitoring-observability","date":"2022-10-20","description":"Elastic Observability introduces Synthetic Monitoring, a GitOps management and SRE-focused workflows game-changer. This tool provides visibility into critical flows and third-party dependencies, enhancing application performance and user experience.","image":"the-end-of-databases-A_(1).jpg","author":[{"slug":"drew-post","type":"Author","_raw":{}}],"tags":[{"slug":"synthetics","type":"Tag","_raw":{}}],"body":{"raw":"\\nWe are excited to announce the general availability of Elastic Observability\'s all-new Synthetic Monitoring. This powerful tool, designed for streamlined GitOps management and Site Reliability Engineers (SRE) workflows, elevates your monitoring capabilities and empowers you to transform your application\'s performance.\\n\\nAs you read through the next few sections, you can also look at these additional resources:\\n\\n- [On-demand webinar: Getting started with synthetic monitoring on Elastic](https://www.elastic.co/virtual-events/improve-business-outcomes-and-observability-with-synthetic-monitoring)\\n- [How to create a CI/CD pipeline with GitHub actions and Elastic synthetic monitoring tests](https://www.elastic.co/blog/uniting-testing-and-monitoring-with-synthetic-monitoring)\\n- [Creating end-to-end synthetics monitoring tests](https://www.elastic.co/blog/why-and-how-replace-end-to-end-tests-synthetic-monitors)\\n- [Playwright (what Elastic uses for synthetic monitoring tests)](https://playwright.dev/)\\n- [Elastic’s NPM library for synthetics monitoring test development](https://www.npmjs.com/package/@elastic/synthetics)\\n\\n![observability monitors](/assets/images/new-synthetic-monitoring-observability/blog-elastic-monitors.png)\\n\\n## Synthetic Monitoring: The missing piece in your observability puzzle\\n\\nSynthetic Monitoring plays a vital role in complementing traditional logs and traces driven Observability, offering a unique lens through which SREs can analyze their critical flows. In the dynamic world of digital applications, ensuring these flows are available and functioning as expected for end-users becomes critical. This is where Synthetic Monitoring shines, offering the only surefire method to gain visibility into these crucial aspects.\\n\\nMoreover, with the rise in the use of third-party dependencies in modern web applications, Synthetic Monitoring becomes indispensable. These third-party elements, while often improving functionality and user experience, can become weak links leading to failures or downtime. Synthetic Monitoring can provide exclusive visibility into these dependencies, enabling teams to identify and address potential issues proactively.\\n\\n![observability network requests](/assets/images/new-synthetic-monitoring-observability/blog-elastic-observability-network-requests.png)\\n\\nBy integrating Synthetic Monitoring into your Observability strategy, you can proactively identify and mitigate potential problems, preventing costly downtime and ensuring an optimal user experience. Our Synthetic Monitoring solution fits perfectly within this framework, providing a comprehensive tool to safeguard your applications\' performance and reliability.\\n\\n## SRE-focused solution\\n\\nElevate your SRE workflows with our Synthetic Monitoring product, built with an SRE\'s needs in mind. Enjoy access to dedicated error detail pages that serve up all crucial information at a glance, allowing you to effortlessly triage and diagnose issues. Our comparison feature offers a side-by-side view of the last successful test run and the failed one, further simplifying issue resolution. With additional features such as performance trend analysis, proactive alerts, and seamless [integration with incident management tools](https://www.elastic.co/integrations/data-integrations?solution=all-solutions&category=ticketing), (such as [ServiceNow](https://www.elastic.co/blog/elastic-integrations-with-servicenow-itsm-sir-itom)) our Synthetic Monitoring solution is the quintessential tool for maintaining smooth and reliable end-user experiences.\\n\\n![observability service unavailable](/assets/images/new-synthetic-monitoring-observability/blog-elastic-observability-service-unavailable.png)\\n\\n## A leap forward in GitOps management\\n\\nExperience an industry first in synthetic monitoring with our groundbreaking product, uniquely built on top of the powerful browser testing framework Playwright. This innovation enables you to manage monitors as code, allowing you to write and verify tests in pre-production before effortlessly pushing the test scripts into synthetic monitoring for ongoing testing in production.\\n\\nFor developers wishing to run tests locally, our solution integrates seamlessly with the [NPM library](https://www.npmjs.com/package/@elastic/synthetics). This flexibility ensures that our product not only eliminates the lag between code releases and testing updates, but also simplifies the management of large volumes of monitors and scripts.\\n\\nMoreover, keeping scripts in source control further provides advantages such as version control, Role-Based Access Control (RBAC), and the opportunity to centralize your test code alongside your application code. In essence, our Playwright-based solution revolutionizes synthetic monitoring by streamlining the entire testing process, ensuring seamless and efficient monitoring in all environments.\\n\\n![observability open editions](/assets/images/new-synthetic-monitoring-observability/blog-elastic-open-editions.png)\\n\\n## Managed testing infrastructure for comprehensive coverage without the hassle\\n\\nOur Synthetic Monitoring solution introduces an Elastic-first managed testing service, offering a global network of testing locations. At launch there are ten locations around the globe and we will be continuously growing our footprint. Eliminate the headaches of hardware management, capacity planning, scaling, updating, and security patching. Conduct both lightweight and full browser tests with ease and take advantage of features such as automatic scaling, built-in security, and seamless integration with Elastic Observability. For those use cases requiring a testing agent deployed within your own infrastructure, we offer support via Private Testing Locations. This enables your teams to focus on what matters most — delivering outstanding user experiences.\\n\\n## Pricing and promotional period\\n\\nTo celebrate the launch, we\'re providing a free promotional period for the managed testing service. From now until September 1, 2023, all test execution will be free of charge. After that, the browser test runs will be charged at a minimal $0.014 per test run. We will also have a unique flat rate for ping test execution set at $35/month/region for virtually unlimited lightweight test execution. We will not charge for test execution for private locations. [View our Pricing page](https://www.elastic.co/pricing/) for more information.\\n\\n## Try it out\\n\\nDon\'t miss out on this opportunity to experience our unique approach to Synthetic Monitoring. [Upgrade your existing Elastic Stack to 8.8.0](https://www.elastic.co/blog/whats-new-elastic-observability-8-8-0) to take advantage of our free promotional period.\\n\\nRead about these capabilities and more in the Elastic Observability 8.8.0 [release notes](https://www.elastic.co/guide/en/welcome-to-elastic/current/new.html).\\n\\nExisting Elastic Cloud customers can access many of these features directly from the [Elastic Cloud console](https://cloud.elastic.co/). Not taking advantage of Elastic on cloud? [Start a free trial](https://www.elastic.co/cloud/cloud-trial-overview).\\n\\n_Originally published October 25, 2022; updated May 23, 2023._\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var g=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var f=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var n in e)r(t,n,{get:e[n],enumerable:!0})},a=(t,e,n,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of p(e))!w.call(t,o)&&o!==n&&r(t,o,{get:()=>e[o],enumerable:!(s=u(e,o))||s.enumerable});return t};var b=(t,e,n)=>(n=t!=null?g(m(t)):{},a(e||!t||!t.__esModule?r(n,\\"default\\",{value:t,enumerable:!0}):n,t)),v=t=>a(r({},\\"__esModule\\",{value:!0}),t);var c=f((M,l)=>{l.exports=_jsx_runtime});var x={};y(x,{default:()=>d,frontmatter:()=>E});var i=b(c()),E={title:\\"Introducing Elastic Observability\'s new Synthetic Monitoring: Designed for seamless GitOps management and SRE-focused workflows\\",slug:\\"new-synthetic-monitoring-observability\\",date:\\"2022-10-20\\",description:\\"Elastic Observability introduces Synthetic Monitoring, a GitOps management and SRE-focused workflows game-changer. This tool provides visibility into critical flows and third-party dependencies, enhancing application performance and user experience.\\",author:[{slug:\\"drew-post\\"}],image:\\"the-end-of-databases-A_(1).jpg\\",tags:[{slug:\\"synthetics\\"}]};function h(t){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",ul:\\"ul\\",...t.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(e.p,{children:\\"We are excited to announce the general availability of Elastic Observability\'s all-new Synthetic Monitoring. This powerful tool, designed for streamlined GitOps management and Site Reliability Engineers (SRE) workflows, elevates your monitoring capabilities and empowers you to transform your application\'s performance.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"As you read through the next few sections, you can also look at these additional resources:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/virtual-events/improve-business-outcomes-and-observability-with-synthetic-monitoring\\",rel:\\"nofollow\\",children:\\"On-demand webinar: Getting started with synthetic monitoring on Elastic\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/uniting-testing-and-monitoring-with-synthetic-monitoring\\",rel:\\"nofollow\\",children:\\"How to create a CI/CD pipeline with GitHub actions and Elastic synthetic monitoring tests\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/why-and-how-replace-end-to-end-tests-synthetic-monitors\\",rel:\\"nofollow\\",children:\\"Creating end-to-end synthetics monitoring tests\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://playwright.dev/\\",rel:\\"nofollow\\",children:\\"Playwright (what Elastic uses for synthetic monitoring tests)\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.npmjs.com/package/@elastic/synthetics\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s NPM library for synthetics monitoring test development\\"})}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/new-synthetic-monitoring-observability/blog-elastic-monitors.png\\",alt:\\"observability monitors\\",width:\\"1999\\",height:\\"1293\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"synthetic-monitoring-the-missing-piece-in-your-observability-puzzle\\",children:\\"Synthetic Monitoring: The missing piece in your observability puzzle\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Synthetic Monitoring plays a vital role in complementing traditional logs and traces driven Observability, offering a unique lens through which SREs can analyze their critical flows. In the dynamic world of digital applications, ensuring these flows are available and functioning as expected for end-users becomes critical. This is where Synthetic Monitoring shines, offering the only surefire method to gain visibility into these crucial aspects.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Moreover, with the rise in the use of third-party dependencies in modern web applications, Synthetic Monitoring becomes indispensable. These third-party elements, while often improving functionality and user experience, can become weak links leading to failures or downtime. Synthetic Monitoring can provide exclusive visibility into these dependencies, enabling teams to identify and address potential issues proactively.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/new-synthetic-monitoring-observability/blog-elastic-observability-network-requests.png\\",alt:\\"observability network requests\\",width:\\"1999\\",height:\\"1302\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"By integrating Synthetic Monitoring into your Observability strategy, you can proactively identify and mitigate potential problems, preventing costly downtime and ensuring an optimal user experience. Our Synthetic Monitoring solution fits perfectly within this framework, providing a comprehensive tool to safeguard your applications\' performance and reliability.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"sre-focused-solution\\",children:\\"SRE-focused solution\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elevate your SRE workflows with our Synthetic Monitoring product, built with an SRE\'s needs in mind. Enjoy access to dedicated error detail pages that serve up all crucial information at a glance, allowing you to effortlessly triage and diagnose issues. Our comparison feature offers a side-by-side view of the last successful test run and the failed one, further simplifying issue resolution. With additional features such as performance trend analysis, proactive alerts, and seamless \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations?solution=all-solutions&category=ticketing\\",rel:\\"nofollow\\",children:\\"integration with incident management tools\\"}),\\", (such as \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-integrations-with-servicenow-itsm-sir-itom\\",rel:\\"nofollow\\",children:\\"ServiceNow\\"}),\\") our Synthetic Monitoring solution is the quintessential tool for maintaining smooth and reliable end-user experiences.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/new-synthetic-monitoring-observability/blog-elastic-observability-service-unavailable.png\\",alt:\\"observability service unavailable\\",width:\\"1999\\",height:\\"1673\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"a-leap-forward-in-gitops-management\\",children:\\"A leap forward in GitOps management\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Experience an industry first in synthetic monitoring with our groundbreaking product, uniquely built on top of the powerful browser testing framework Playwright. This innovation enables you to manage monitors as code, allowing you to write and verify tests in pre-production before effortlessly pushing the test scripts into synthetic monitoring for ongoing testing in production.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"For developers wishing to run tests locally, our solution integrates seamlessly with the \\",(0,i.jsx)(e.a,{href:\\"https://www.npmjs.com/package/@elastic/synthetics\\",rel:\\"nofollow\\",children:\\"NPM library\\"}),\\". This flexibility ensures that our product not only eliminates the lag between code releases and testing updates, but also simplifies the management of large volumes of monitors and scripts.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Moreover, keeping scripts in source control further provides advantages such as version control, Role-Based Access Control (RBAC), and the opportunity to centralize your test code alongside your application code. In essence, our Playwright-based solution revolutionizes synthetic monitoring by streamlining the entire testing process, ensuring seamless and efficient monitoring in all environments.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/new-synthetic-monitoring-observability/blog-elastic-open-editions.png\\",alt:\\"observability open editions\\",width:\\"1999\\",height:\\"1174\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"managed-testing-infrastructure-for-comprehensive-coverage-without-the-hassle\\",children:\\"Managed testing infrastructure for comprehensive coverage without the hassle\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Our Synthetic Monitoring solution introduces an Elastic-first managed testing service, offering a global network of testing locations. At launch there are ten locations around the globe and we will be continuously growing our footprint. Eliminate the headaches of hardware management, capacity planning, scaling, updating, and security patching. Conduct both lightweight and full browser tests with ease and take advantage of features such as automatic scaling, built-in security, and seamless integration with Elastic Observability. For those use cases requiring a testing agent deployed within your own infrastructure, we offer support via Private Testing Locations. This enables your teams to focus on what matters most \\\\u2014 delivering outstanding user experiences.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"pricing-and-promotional-period\\",children:\\"Pricing and promotional period\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"To celebrate the launch, we\'re providing a free promotional period for the managed testing service. From now until September 1, 2023, all test execution will be free of charge. After that, the browser test runs will be charged at a minimal $0.014 per test run. We will also have a unique flat rate for ping test execution set at $35/month/region for virtually unlimited lightweight test execution. We will not charge for test execution for private locations. \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/pricing/\\",rel:\\"nofollow\\",children:\\"View our Pricing page\\"}),\\" for more information.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Don\'t miss out on this opportunity to experience our unique approach to Synthetic Monitoring. \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-observability-8-8-0\\",rel:\\"nofollow\\",children:\\"Upgrade your existing Elastic Stack to 8.8.0\\"}),\\" to take advantage of our free promotional period.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Read about these capabilities and more in the Elastic Observability 8.8.0 \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/welcome-to-elastic/current/new.html\\",rel:\\"nofollow\\",children:\\"release notes\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Existing Elastic Cloud customers can access many of these features directly from the \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"Elastic Cloud console\\"}),\\". Not taking advantage of Elastic on cloud? \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/cloud-trial-overview\\",rel:\\"nofollow\\",children:\\"Start a free trial\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"Originally published October 25, 2022; updated May 23, 2023.\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,i.jsx)(e,{...t,children:(0,i.jsx)(h,{...t})}):h(t)}return v(x);})();\\n;return Component;"},"_id":"articles/new-synthetic-monitoring-observability.mdx","_raw":{"sourceFilePath":"articles/new-synthetic-monitoring-observability.mdx","sourceFileName":"new-synthetic-monitoring-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/new-synthetic-monitoring-observability"},"type":"Article","imageUrl":"/assets/images/new-synthetic-monitoring-observability/the-end-of-databases-A_(1).jpg","readingTime":"5 min read","url":"/new-synthetic-monitoring-observability","headings":[{"level":2,"title":"Synthetic Monitoring: The missing piece in your observability puzzle","href":"#synthetic-monitoring-the-missing-piece-in-your-observability-puzzle"},{"level":2,"title":"SRE-focused solution","href":"#sre-focused-solution"},{"level":2,"title":"A leap forward in GitOps management","href":"#a-leap-forward-in-gitops-management"},{"level":2,"title":"Managed testing infrastructure for comprehensive coverage without the hassle","href":"#managed-testing-infrastructure-for-comprehensive-coverage-without-the-hassle"},{"level":2,"title":"Pricing and promotional period","href":"#pricing-and-promotional-period"},{"level":2,"title":"Try it out","href":"#try-it-out"}]},{"title":"NGNIX log analytics with GenAI in Elastic","slug":"nginx-log-analytics-with-genai-elastic","date":"2024-07-05","description":"Elastic has a set of embedded capabilities such as a GenAI RAG-based AI Assistant and a machine learning platform as part of the product baseline. These make analyzing the vast number of logs you get from NGINX easier.","image":"blog-thumb-observability-pattern-color.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"genai","type":"Tag","_raw":{}},{"slug":"nginx","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"ai-assistant","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic Observability provides a full observability solution, supporting metrics, traces, and logs for applications and infrastructure. NGINX, which is highly used for web serving, load balancing, http caching, and reverse proxy, is the key to many applications and outputs a large volume of logs. NGINX’s access logs, which detail all requests made to the NGINX server, and error logs which record server-related issues and problems are key to managing and analyzing NGINX issues along with understanding what is happening to your application.\xa0\\n\\nIn managing NGINX Elastic provides several capabilities:\\n\\n1. Easy ingest, parsing, and out-of-the-box dashboards. Check out the simple how-to in our [docs](https://www.elastic.co/guide/en/fleet/current/example-standalone-monitor-nginx.html). Based on logs, these dashboards show several items over time, response codes, errors, top pages, data volume, browsers used, active connections, drop rates, and much more.\\n\\n2. Out-of-the-box ML-based anomaly detection jobs for your NGINX logs. These jobs help pinpoint anomalies against request rates, IP address request rates, URL access, status codes, and visitor rate anomalies.\\n\\n3. ES|QL which helps work through logs and build out charts during analysis.\\n\\n4. Elastic’s GenAI Assistant provides a simple natural language interface that helps analyze all the logs and can pull out issues from ML jobs and even create dashboards. The Elastic AI Assistant also automatically uses ES|QL.\\n\\n5. NGINX SLOs - Finally Elastic provides the ability to define and monitor SLOs for your NGINX logs. While most SLOs are metrics-based, Elastic allows you to create logs-based SLOs. We detailed this in a previous [blog](https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics).\\n\\nNGINX logs are another example of why logs are great.\xa0 Logging is an important part of Observability, for which we generally think of metrics and tracing. However, the amount of logs an application and the underlying infrastructure output can be significantly daunting and NGINX is usually the starting point for most analyses.\xa0\\n\\nIn today’s blog, we’ll cover how the out-of-the-box ML-based anomaly detection jobs can help RCA, and how Elastic’s GenAI Assistant helps easily work through logs to pinpoint issues in minutes.\xa0\\n\\n## Prerequisites and config\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\n\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)).\\n\\n- Bring up an [NGINX server](https://docs.nginx.com/nginx/admin-guide/web-server/) on a host. OR run an application with NGINX as a front end and drive traffic.\\n\\n- Install the NGINX integration and assets and review the dashboards as noted in the [docs](https://www.elastic.co/guide/en/fleet/current/example-standalone-monitor-nginx.html).\\n\\n- Ensure you have an [ML node configured](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-settings.html) in your Elastic stack\\n\\n- To use the AI Assistant you will need a trial or upgrade to Platinum.\\n\\n\\nIn our scenario, we use data from 3 months from our Elastic environment to help highlight the features. Hence you might need to run your application with traffic for a specific time frame to follow along.\\n\\n\\n## Analyzing the issues with AI Assistant\\n\\nAs detailed in a previous [blog](https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics), you can get alerted on issues via SLO monitoring against NGINX logs. Let’s assume you have an SLO based on status codes as we outlined in the previous [blog](https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics). You can immediately analyze the issue via the AI Assistant. Because it\'s a chat interface we simply open the AI Assistant and work through some simple analysis: (See Animated GIF for a demo)\\n\\n\\n### AI Assistant analysis:\\n\\n- **_Using lens graph all http response status codes < 400 and > =400 from filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer_** _-_ We wanted to simply understand the amount of requests resulting in status code >= 400 and graph the results. We see that 15% of the requests were not successful, hence an SLO alert being triggered.\\n\\n- **Which ip address (field source.adress) has the highest number of http.response.status.code >= 400 from filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer** \xa0- We were curious is there was a specific IP address not having successful requests. 72.57.0.53, with a count of 25,227 occurrences is daily high but not the ensure 2 failed requests.\\n\\n- **_What country (source.geo.country_iso_code) is source.address=72.57.0.53 coming from. Use filebeat-nginx-elasticco-anon-2017._** - Again we were curious if this came from a specific country. And the IP address 72.57.0.53 is coming from the country with the ISO code IN, which corresponds to India. Nothing out of the ordinary.\\n\\n- **_Did source.address=72.57.0.53 have any (http.response.status.code < 400) from filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer -_**\xa0 Oddly the IP address in question only had 4000+ successful responses. Meaning its not malicious, and points to something else.\\n\\n- **_What are the different status codes (http.response.status.code>=400), from source.address=72.57.0.53. Use filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer. Provide counts for each status code -_** We are curious whether or not we see any 502, which there were none, but most of the failures were 404.\xa0\\n\\n- **_What are the different status codes (http.response.status.code>=400). Use filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer. Provide counts for each status code_** - Regardless of a specific address, what is the largest number of status code occurrences > 400. This also points to 404.\xa0\\n\\n- **_What does a high 404 count from a specific IP address mean from NGINX logs?_** - Asking this question, we need to understand the potential causes of this from our application. From the answers, we can rule out security probing and web scraping, as we validated that a specific address 72.57.0.53 has a low non-success request status code. It also rules out User error. Hence this points potentially to Broken Links or Missing Resources.\\n\\n\\n### Watch the flow:\\n\\n\\n\\n### Potential issue:\\n\\nIt seems that we potentially have an issue with the backend serving specific answers or having issues with resources (database, or broken links). This is cursing the higher-than-normal non-successful status codes>=400.\\n\\n### Key highlights from AI Assistant:\\n\\nAs you watched this video you will notice a few things:\\n\\n1. We analyzed millions of logs in a matter of minutes using a set of simple natural language queries.\xa0\\n\\n2. We didn’t need to know any special query language. The AI Assistant used Elastic’s ES|QL but can similarly use KQL also.\xa0\\n\\n3. The AI Assistant easily builds out graphs\\n\\n4. The AI Assistant is accessing and using internal information stored in Elastic’s indices. Vs a simple “google foo” based AI Assistant. This is enabled through RAG, and the AI Assistant can also bring up known issues in github, runbooks, and other useful internal information.\\n\\nCheck out the following [blog](https://www.elastic.co/observability-labs/blog/elastic-rag-ai-assistant-application-issues-llm-github) on how the AI Assistant uses RAG to retrieve internal information. Specifically using github and runbooks.\\n\\n\\n## Locating anomalies with ML\\n\\nWhile using the AI Assistant is great for analyzing information, another important aspect of NGINX log management is to ensure you can manage log spikes and anomalies. Elastic has a machine learning platform that allows you to develop jobs to analyze specific metrics or multiple metrics to look for anomalies.When using NGINX, there are several [out-of-the-box anomaly detection jobs](https://www.elastic.co/guide/en/machine-learning/current/ootb-ml-jobs-nginx.html). These work specifically on NGINX access logs.\\n\\n* Low_request_rate_nginx - Detect low request rates\\n\\n* Source_ip_request_rate_nginx - Detect unusual source IPs - high request rates\\n\\n* Source_ip_url_count_nginx - Detect unusual source IPs - high distinct count of URLs\\n\\n* Status_code_rate_nginx - Detect unusual status code rates\\n\\n* Visitor_rate_nginx - Detect unusual visitor rates\\n\\nBeing right out of the box, lets look at the job - Status_code_rate_nginx, which is related to our previous analysis.\\n\\n![NGINX ML Log Analytics](/assets/images/nginx-log-analytics-with-genai-elastic/nginx-ml-log-analytics.png)\\n\\nWith a few simple clicks we immediately get an analysis showing a specific IP address - 72.57.0.53, having higher than normal non-successful requests. Oddly we also found this is using the AI Assistant.\\n\\nWe can take this further with conversations with the AI Assistant, look at the logs, and/or even look at the other ML anomaly jobs.\\n\\n\\n## Conclusion:\\n\\nYou’ve now seen how easily Elastic’s RAG-based AI Assistant can help analyze NGINX logs without even the need to know query syntax, understand where the data is, and understand even the fields. Additionally, you’ve also seen how we can alert you when a potential issue or degradation in service (SLO).\xa0\\n\\nCheck out other resources on NGINX logs:\\n\\n[Out-of-the-box anomaly detection jobs for NGINX](https://www.elastic.co/guide/en/machine-learning/current/ootb-ml-jobs-nginx.html)\\n\\n[Using the NGINX integration to ingest and analyze NGINX Logs](https://www.elastic.co/guide/en/fleet/current/example-standalone-monitor-nginx.html)\\n\\n[NGINX Logs based SLOs in Elastic](https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics)\\n\\n[Using GitHub issues, runbooks, and other internal information for RCAs with Elastic’s RAG based AI Assistant](https://www.elastic.co/observability-labs/blog/elastic-rag-ai-assistant-application-issues-llm-github)\\n\\n\\n## Try it out\\n\\nExisting Elastic Cloud customers can access many of these features directly from the [Elastic Cloud console](https://cloud.elastic.co/). Not taking advantage of Elastic on the cloud? [Start a free trial](https://www.elastic.co/cloud/cloud-trial-overview).\\n\\nAll of this is also possible in your environment. [Learn how to get started today](https://www.elastic.co/observability/universal-profiling).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var w=(s,e)=>()=>(e||s((e={exports:{}}).exports,e),e.exports),y=(s,e)=>{for(var t in e)a(s,t,{get:e[t],enumerable:!0})},r=(s,e,t,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of p(e))!m.call(s,i)&&i!==t&&a(s,i,{get:()=>e[i],enumerable:!(o=g(e,i))||o.enumerable});return s};var b=(s,e,t)=>(t=s!=null?u(f(s)):{},r(e||!s||!s.__esModule?a(t,\\"default\\",{value:s,enumerable:!0}):t,s)),v=s=>r(a({},\\"__esModule\\",{value:!0}),s);var c=w((k,l)=>{l.exports=_jsx_runtime});var N={};y(N,{default:()=>d,frontmatter:()=>I});var n=b(c()),I={title:\\"NGNIX log analytics with GenAI in Elastic\\",slug:\\"nginx-log-analytics-with-genai-elastic\\",date:\\"2024-07-05\\",description:\\"Elastic has a set of embedded capabilities such as a GenAI RAG-based AI Assistant and a machine learning platform as part of the product baseline. These make analyzing the vast number of logs you get from NGINX easier.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"blog-thumb-observability-pattern-color.png\\",tags:[{slug:\\"genai\\"},{slug:\\"nginx\\"},{slug:\\"log-analytics\\"},{slug:\\"ai-assistant\\"}]};function h(s){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...s.components},{Video:t}=e;return t||A(\\"Video\\",!0),(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"Elastic Observability provides a full observability solution, supporting metrics, traces, and logs for applications and infrastructure. NGINX, which is highly used for web serving, load balancing, http caching, and reverse proxy, is the key to many applications and outputs a large volume of logs. NGINX\\\\u2019s access logs, which detail all requests made to the NGINX server, and error logs which record server-related issues and problems are key to managing and analyzing NGINX issues along with understanding what is happening to your application.\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In managing NGINX Elastic provides several capabilities:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Easy ingest, parsing, and out-of-the-box dashboards. Check out the simple how-to in our \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/example-standalone-monitor-nginx.html\\",rel:\\"nofollow\\",children:\\"docs\\"}),\\". Based on logs, these dashboards show several items over time, response codes, errors, top pages, data volume, browsers used, active connections, drop rates, and much more.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Out-of-the-box ML-based anomaly detection jobs for your NGINX logs. These jobs help pinpoint anomalies against request rates, IP address request rates, URL access, status codes, and visitor rate anomalies.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"ES|QL which helps work through logs and build out charts during analysis.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic\\\\u2019s GenAI Assistant provides a simple natural language interface that helps analyze all the logs and can pull out issues from ML jobs and even create dashboards. The Elastic AI Assistant also automatically uses ES|QL.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"NGINX SLOs - Finally Elastic provides the ability to define and monitor SLOs for your NGINX logs. While most SLOs are metrics-based, Elastic allows you to create logs-based SLOs. We detailed this in a previous \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\".\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"NGINX logs are another example of why logs are great.\\\\xA0 Logging is an important part of Observability, for which we generally think of metrics and tracing. However, the amount of logs an application and the underlying infrastructure output can be significantly daunting and NGINX is usually the starting point for most analyses.\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In today\\\\u2019s blog, we\\\\u2019ll cover how the out-of-the-box ML-based anomaly detection jobs can help RCA, and how Elastic\\\\u2019s GenAI Assistant helps easily work through logs to pinpoint issues in minutes.\\\\xA0\\"}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"prerequisites-and-config\\",children:[\\"Prerequisites and config\\",(0,n.jsx)(\\"a\\",{id:\\"prerequisites-and-config\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Ensure you have an account on \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\").\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Bring up an \\",(0,n.jsx)(e.a,{href:\\"https://docs.nginx.com/nginx/admin-guide/web-server/\\",rel:\\"nofollow\\",children:\\"NGINX server\\"}),\\" on a host. OR run an application with NGINX as a front end and drive traffic.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Install the NGINX integration and assets and review the dashboards as noted in the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/example-standalone-monitor-nginx.html\\",rel:\\"nofollow\\",children:\\"docs\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"Ensure you have an \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-settings.html\\",rel:\\"nofollow\\",children:\\"ML node configured\\"}),\\" in your Elastic stack\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"To use the AI Assistant you will need a trial or upgrade to Platinum.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In our scenario, we use data from 3 months from our Elastic environment to help highlight the features. Hence you might need to run your application with traffic for a specific time frame to follow along.\\"}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"analyzing-the-issues-with-ai-assistant\\",children:[\\"Analyzing the issues with AI Assistant\\",(0,n.jsx)(\\"a\\",{id:\\"analyzing-the-issues-with-ai-assistant\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"As detailed in a previous \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\", you can get alerted on issues via SLO monitoring against NGINX logs. Let\\\\u2019s assume you have an SLO based on status codes as we outlined in the previous \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\". You can immediately analyze the issue via the AI Assistant. Because it\'s a chat interface we simply open the AI Assistant and work through some simple analysis: (See Animated GIF for a demo)\\"]}),`\\n`,(0,n.jsxs)(e.h3,{id:\\"ai-assistant-analysis\\",children:[\\"AI Assistant analysis:\\",(0,n.jsx)(\\"a\\",{id:\\"ai-assistant-analysis\\"})]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:(0,n.jsx)(e.em,{children:\\"Using lens graph all http response status codes < 400 and > =400 from filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer\\"})}),\\" \\",(0,n.jsx)(e.em,{children:\\"-\\"}),\\" We wanted to simply understand the amount of requests resulting in status code >= 400 and graph the results. We see that 15% of the requests were not successful, hence an SLO alert being triggered.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Which ip address (field source.adress) has the highest number of http.response.status.code >= 400 from filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer\\"}),\\" \\\\xA0- We were curious is there was a specific IP address not having successful requests. 72.57.0.53, with a count of 25,227 occurrences is daily high but not the ensure 2 failed requests.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:(0,n.jsx)(e.em,{children:\\"What country (source.geo.country_iso_code) is source.address=72.57.0.53 coming from. Use filebeat-nginx-elasticco-anon-2017.\\"})}),\\" - Again we were curious if this came from a specific country. And the IP address 72.57.0.53 is coming from the country with the ISO code IN, which corresponds to India. Nothing out of the ordinary.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:(0,n.jsx)(e.em,{children:\\"Did source.address=72.57.0.53 have any (http.response.status.code < 400) from filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer -\\"})}),\\"\\\\xA0 Oddly the IP address in question only had 4000+ successful responses. Meaning its not malicious, and points to something else.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:(0,n.jsx)(e.em,{children:\\"What are the different status codes (http.response.status.code>=400), from source.address=72.57.0.53. Use filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer. Provide counts for each status code -\\"})}),\\" We are curious whether or not we see any 502, which there were none, but most of the failures were 404.\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:(0,n.jsx)(e.em,{children:\\"What are the different status codes (http.response.status.code>=400). Use filebeat-nginx-elasticco-anon-2017. http.response.status.code is not an integer. Provide counts for each status code\\"})}),\\" - Regardless of a specific address, what is the largest number of status code occurrences > 400. This also points to 404.\\\\xA0\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:(0,n.jsx)(e.em,{children:\\"What does a high 404 count from a specific IP address mean from NGINX logs?\\"})}),\\" - Asking this question, we need to understand the potential causes of this from our application. From the answers, we can rule out security probing and web scraping, as we validated that a specific address 72.57.0.53 has a low non-success request status code. It also rules out User error. Hence this points potentially to Broken Links or Missing Resources.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.h3,{id:\\"watch-the-flow\\",children:[\\"Watch the flow:\\",(0,n.jsx)(\\"a\\",{id:\\"watch-the-flow\\"})]}),`\\n`,(0,n.jsx)(t,{vidyardUuid:\\"ak9xDdhcL3SxpqU7CRsD68\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"potential-issue\\",children:\\"Potential issue:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"It seems that we potentially have an issue with the backend serving specific answers or having issues with resources (database, or broken links). This is cursing the higher-than-normal non-successful status codes>=400.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"key-highlights-from-ai-assistant\\",children:\\"Key highlights from AI Assistant:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"As you watched this video you will notice a few things:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"We analyzed millions of logs in a matter of minutes using a set of simple natural language queries.\\\\xA0\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"We didn\\\\u2019t need to know any special query language. The AI Assistant used Elastic\\\\u2019s ES|QL but can similarly use KQL also.\\\\xA0\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The AI Assistant easily builds out graphs\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The AI Assistant is accessing and using internal information stored in Elastic\\\\u2019s indices. Vs a simple \\\\u201Cgoogle foo\\\\u201D based AI Assistant. This is enabled through RAG, and the AI Assistant can also bring up known issues in github, runbooks, and other useful internal information.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Check out the following \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-rag-ai-assistant-application-issues-llm-github\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\" on how the AI Assistant uses RAG to retrieve internal information. Specifically using github and runbooks.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"locating-anomalies-with-ml\\",children:\\"Locating anomalies with ML\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"While using the AI Assistant is great for analyzing information, another important aspect of NGINX log management is to ensure you can manage log spikes and anomalies. Elastic has a machine learning platform that allows you to develop jobs to analyze specific metrics or multiple metrics to look for anomalies.When using NGINX, there are several \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ootb-ml-jobs-nginx.html\\",rel:\\"nofollow\\",children:\\"out-of-the-box anomaly detection jobs\\"}),\\". These work specifically on NGINX access logs.\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Low_request_rate_nginx - Detect low request rates\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Source_ip_request_rate_nginx - Detect unusual source IPs - high request rates\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Source_ip_url_count_nginx - Detect unusual source IPs - high distinct count of URLs\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Status_code_rate_nginx - Detect unusual status code rates\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Visitor_rate_nginx - Detect unusual visitor rates\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Being right out of the box, lets look at the job - Status_code_rate_nginx, which is related to our previous analysis.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/nginx-log-analytics-with-genai-elastic/nginx-ml-log-analytics.png\\",alt:\\"NGINX ML Log Analytics\\",width:\\"2892\\",height:\\"1932\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"With a few simple clicks we immediately get an analysis showing a specific IP address - 72.57.0.53, having higher than normal non-successful requests. Oddly we also found this is using the AI Assistant.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We can take this further with conversations with the AI Assistant, look at the logs, and/or even look at the other ML anomaly jobs.\\"}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"conclusion\\",children:[\\"Conclusion:\\",(0,n.jsx)(\\"a\\",{id:\\"conclusion\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"You\\\\u2019ve now seen how easily Elastic\\\\u2019s RAG-based AI Assistant can help analyze NGINX logs without even the need to know query syntax, understand where the data is, and understand even the fields. Additionally, you\\\\u2019ve also seen how we can alert you when a potential issue or degradation in service (SLO).\\\\xA0\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Check out other resources on NGINX logs:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ootb-ml-jobs-nginx.html\\",rel:\\"nofollow\\",children:\\"Out-of-the-box anomaly detection jobs for NGINX\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/example-standalone-monitor-nginx.html\\",rel:\\"nofollow\\",children:\\"Using the NGINX integration to ingest and analyze NGINX Logs\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/service-level-objectives-slos-logs-metrics\\",rel:\\"nofollow\\",children:\\"NGINX Logs based SLOs in Elastic\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-rag-ai-assistant-application-issues-llm-github\\",rel:\\"nofollow\\",children:\\"Using GitHub issues, runbooks, and other internal information for RCAs with Elastic\\\\u2019s RAG based AI Assistant\\"})}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"try-it-out\\",children:[\\"Try it out\\",(0,n.jsx)(\\"a\\",{id:\\"try-it-out\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Existing Elastic Cloud customers can access many of these features directly from the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.elastic.co/\\",rel:\\"nofollow\\",children:\\"Elastic Cloud console\\"}),\\". Not taking advantage of Elastic on the cloud? \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/cloud-trial-overview\\",rel:\\"nofollow\\",children:\\"Start a free trial\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"All of this is also possible in your environment. \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"Learn how to get started today\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function d(s={}){let{wrapper:e}=s.components||{};return e?(0,n.jsx)(e,{...s,children:(0,n.jsx)(h,{...s})}):h(s)}function A(s,e){throw new Error(\\"Expected \\"+(e?\\"component\\":\\"object\\")+\\" `\\"+s+\\"` to be defined: you likely forgot to import, pass, or provide it.\\")}return v(N);})();\\n;return Component;"},"_id":"articles/nginx-log-analytics-with-genai-elastic.mdx","_raw":{"sourceFilePath":"articles/nginx-log-analytics-with-genai-elastic.mdx","sourceFileName":"nginx-log-analytics-with-genai-elastic.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/nginx-log-analytics-with-genai-elastic"},"type":"Article","imageUrl":"/assets/images/nginx-log-analytics-with-genai-elastic/blog-thumb-observability-pattern-color.png","readingTime":"9 min read","url":"/nginx-log-analytics-with-genai-elastic","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-configa-idprerequisites-and-configa"},{"level":2,"title":"Analyzing the issues with AI Assistant","href":"#analyzing-the-issues-with-ai-assistanta-idanalyzing-the-issues-with-ai-assistanta"},{"level":3,"title":"AI Assistant analysis:","href":"#ai-assistant-analysisa-idai-assistant-analysisa"},{"level":3,"title":"Watch the flow:","href":"#watch-the-flowa-idwatch-the-flowa"},{"level":3,"title":"Potential issue:","href":"#potential-issue"},{"level":3,"title":"Key highlights from AI Assistant:","href":"#key-highlights-from-ai-assistant"},{"level":2,"title":"Locating anomalies with ML","href":"#locating-anomalies-with-ml"},{"level":2,"title":"Conclusion:","href":"#conclusiona-idconclusiona"},{"level":2,"title":"Try it out","href":"#try-it-outa-idtry-it-outa"}]},{"title":"Exploring Nginx metrics with Elastic time series data streams","slug":"nginx-metrics-elastic-time-series-data-streams","date":"2023-07-10","description":"Elasticsearch recently released time series metrics as GA. In this blog, we dive into details of what a time series metric document is and the mapping used for enabling time series by using an existing OOTB Nginx integration.","image":"time-series-data-streams-blog-720x420-1.jpg","author":[{"slug":"lalit-satapathy","type":"Author","_raw":{}}],"tags":[{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"tsdb","type":"Tag","_raw":{}},{"slug":"nginx","type":"Tag","_raw":{}}],"body":{"raw":"\\nElasticsearch\xae recently released time series data streams for metrics. This not only provides better metrics support in Elastic Observability, but it also helps reduce [storage costs](https://www.elastic.co/blog/whats-new-elasticsearch-8-7-0). We discussed this in a [previous blog](https://www.elastic.co/blog/elasticsearch-time-series-data-streams-observability-metrics).\\n\\nIn this blog, we dive into how to enable and use time series data streams by reviewing what a time series metrics [document](https://www.elastic.co/guide/en/elasticsearch/reference/current/documents-indices.html) is and the [mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html) used for enabling time series. In particular, we will showcase this by using Elastic Observability’s Nginx integration. As Elastic\xae [time series data stream (TSDS)](https://www.elastic.co/guide/en/elasticsearch/reference/8.8/tsds.html) metrics capabilities evolve, some of the scenarios below will change.\\n\\nElastic TSDS stores metrics in indices optimized for a time series database ([TSDB](https://en.wikipedia.org/wiki/Time_series_database)), which is used to store time series metrics. [Elastic’s TSDB also got a significant optimization in 8.7](https://www.elastic.co/blog/whats-new-elasticsearch-8-7-0) by reducing storage costs by upward of 70%.\\n\\n## What is an Elastic time series data stream?\\n\\nA time series data stream (TSDS) models timestamped metrics data as one or more time series. In a TSDS, each Elasticsearch document represents an observation or data point in a specific time series. Although a TSDS can contain multiple time series, a document can only belong to one time series. A time series can’t span multiple data streams.\\n\\nA regular [data stream](https://www.elastic.co/guide/en/elasticsearch/reference/current/data-streams.html) can have different usages including logs. For metrics usage, however, a time series data stream is recommended. A time series data stream is different from a regular data stream in [multiple ways](https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#differences-from-regular-data-stream). A TSDS contains more than one predefined dimension and multiple metrics.\\n\\n## Nginx metrics as an example\\n\\n[Integrations](https://www.elastic.co/integrations/data-integrations?solution=observability) provide an easy way to ingest observability metrics for a large number of services and systems. We use the [Nginx](https://docs.elastic.co/en/integrations/nginx) integration [metrics](https://docs.elastic.co/en/integrations/nginx#metrics-reference) data set as an example here. This is one of the integrations, on which time series has been recently enabled.\\n\\n## Process of enabling TSDS on a package\\n\\nTime series is [enabled](https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#time-series-mode) on a metrics data stream of an [integration](https://www.elastic.co/integrations/) package, after adding the relevant time series [metrics](https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#time-series-metric) and [dimension](https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#time-series-dimension) mappings. Existing integrations with metrics data streams will come with time series metrics enabled, so that users can use them as-is without any additional configuration.\\n\\nThe image below captures a high-level summary of a time series data stream, the corresponding index template, the time series indices and a single document. We will shortly dive into the details of each of the fields in the document.\\n\\n![time series data stream](/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-1-time-series-data-stream-2.png)\\n\\n## TSDS metric document\\n\\nBelow we provide a snippet of an ingested Elastic document with time series metrics and dimension together.\\n\\n```json\\n{\\n \\"@timestamp\\": \\"2023-06-29T03:58:12.772Z\\",\\n\\n \\"nginx\\": {\\n \\"stubstatus\\": {\\n \\"accepts\\": 202,\\n \\"active\\": 2,\\n \\"current\\": 3,\\n \\"dropped\\": 0,\\n \\"handled\\": 202,\\n \\"hostname\\": \\"host.docker.internal:80\\",\\n \\"reading\\": 0,\\n \\"requests\\": 10217,\\n \\"waiting\\": 1,\\n \\"writing\\": 1\\n }\\n }\\n}\\n```\\n\\n**Multiple metrics per document:** \\nAn ingested [document](https://www.elastic.co/guide/en/elasticsearch/reference/current/documents-indices.html) has a collection of fields, including metrics fields. Multiple related metrics fields can be part of a single document. A document is part of a single [data stream](https://www.elastic.co/guide/en/fleet/current/data-streams.html), and typically all the metrics it contains are related. All the metrics in a document are part of the same time series.\\n\\n**Metric type and dimensions as mapping:** \\nWhile the document contains the metrics details, the metric types and dimension details are defined as part of the field [mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html). All the time series relevant field mappings are defined collectively for a given datastream, as part of the package development. All the integrations released with time series data stream, contain all the relevant time series field mappings, as part of the package release. There are two additional mappings needed in particular: **time_series_metric** mapping and **time_series_dimension** mapping.\\n\\n## Metrics types fields\\n\\nA document contains the metric type fields (as shown above). The mappings for the metric type fields is done using **time_series_metric** mapping in the [index templates](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html) as given below:\\n\\n```json\\n\\"nginx\\": {\\n \\"properties\\": {\\n \\"stubstatus\\": {\\n \\"properties\\": {\\n \\"accepts\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"active\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n },\\n \\"current\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n },\\n \\"dropped\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"handled\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"reading\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n },\\n \\"requests\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"waiting\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n },\\n \\"writing\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n }\\n }\\n }\\n }\\n}\\n```\\n\\n## Dimension fields\\n\\n[Dimensions](https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#time-series-dimension) are field names and values that, in combination, identify a document’s time series.\\n\\nIn Elastic time series, there are some additional considerations for dimensions:\\n\\n- Dimension fields need to be defined for each time series. There will be no time series with zero dimension fields.\\n- Keyword (or similar) type fields can be defined as dimensions.\\n- There is a current limit on the number of dimensions that can be defined in a data stream. The limit restrictions will likely be lifted going forward.\\n\\nDimension is common for all the metrics in a single document, as part of a data stream. Each time series data stream of a package (example: Nginx) already comes with a predefined set of dimension fields as below.\\n\\nThe document would contain more than one dimension field. In the case of Nginx, _agend.id_ and _nginx.stubstatus.hostname_ are some of the dimension fields. The mappings for the dimension fields is done using **time_series_dimension** mapping as below:\\n\\n```json\\n\\"agent\\": {\\n \\"properties\\": {\\n \\"id\\": {\\n \\"type\\": \\"keyword\\",\\n \\"time_series_dimension\\": true\\n }\\n }\\n },\\n\\n\\"nginx\\": {\\n \\"properties\\": {\\n \\"stubstatus\\": {\\n \\"properties\\": {\\n \\"hostname\\": {\\n \\"type\\": \\"keyword\\",\\n \\"time_series_dimension\\": true\\n },\\n }\\n }\\n }\\n}\\n```\\n\\n## Meta fields\\n\\nDocuments ingested also have additional meta fields apart from the _metric_ and _dimension_ fields explained above. These additional fields provide richer query capabilities for the metrics.\\n\\n**Example Elastic meta fields**\\n\\n```json\\n\\"data_stream\\": {\\n \\"dataset\\": \\"nginx.stubstatus\\",\\n \\"namespace\\": \\"default\\",\\n \\"type\\": \\"metrics\\"\\n }\\n```\\n\\n## Discover and visualization in Kibana\\n\\nElastic provides comprehensive search and visualization for the time series metrics. Time series metrics can be searched as-is in [Discover](https://www.elastic.co/guide/en/kibana/current/discover.html). In the search below, the counter and gauges metrics are captured as _different icons_. Below we also provide examples of visualization for the time series metrics using [Lens](https://www.elastic.co/kibana/kibana-lens) and OOTB dashboard included as part of the Nginx integration package.\\n\\n![Discover search for TSDS metrics](/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-2-discover-search-tsds.png)\\n\\n![Maximum of counter field nginx.stubstatus.accepts visualized using Lens](/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-3-lens.png)\\n\\n![Median of gauge field nginx.stubstatus.active visualized using Lens](/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-4-median-gauge.png)\\n\\n![OOTB Nginx dashboard with the TSDS metrics visualizations ](/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-5-multiple-line-graphs.png)\\n\\n## Try it out!\\n\\nWe have provided a detailed example of a time series document ingested by the Elastic Nginx integration. We have walked through how time series metrics are modeled in Elastic and the additional time series mappings with examples. We provided details of dimension requirements for Elastic time series, as well as brief examples of search/visualization/dashboard of TSDS metrics in Kibana\xae.\\n\\nDon’t have an Elastic Cloud account yet? [Sign up for Elastic Cloud](https://cloud.elastic.co/registration) and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\n\\n> - [How to use Elasticsearch and Time Series Data Streams for observability metrics](https://www.elastic.co/blog/elasticsearch-time-series-data-streams-observability-metrics)\\n> - [Time Series Data Stream in Elastic documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html) \\n> - [Efficient storage with Elastic Time Series Database](https://www.elastic.co/blog/whats-new-elasticsearch-8-7-0)[Elastic integrations catalog](https://www.elastic.co/integrations/)\\n> - [Elastic integrations catalog](https://www.elastic.co/integrations/)\\n","code":"var Component=(()=>{var h=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),b=(t,e)=>{for(var s in e)a(t,s,{get:e[s],enumerable:!0})},l=(t,e,s,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of p(e))!f.call(t,n)&&n!==s&&a(t,n,{get:()=>e[n],enumerable:!(r=g(e,n))||r.enumerable});return t};var y=(t,e,s)=>(s=t!=null?h(u(t)):{},l(e||!t||!t.__esModule?a(s,\\"default\\",{value:t,enumerable:!0}):s,t)),v=t=>l(a({},\\"__esModule\\",{value:!0}),t);var o=w((E,c)=>{c.exports=_jsx_runtime});var _={};b(_,{default:()=>m,frontmatter:()=>x});var i=y(o()),x={title:\\"Exploring Nginx metrics with Elastic time series data streams\\",slug:\\"nginx-metrics-elastic-time-series-data-streams\\",date:\\"2023-07-10\\",description:\\"Elasticsearch recently released time series metrics as GA. In this blog, we dive into details of what a time series metric document is and the mapping used for enabling time series by using an existing OOTB Nginx integration.\\",author:[{slug:\\"lalit-satapathy\\"}],image:\\"time-series-data-streams-blog-720x420-1.jpg\\",tags:[{slug:\\"metrics\\"},{slug:\\"tsdb\\"},{slug:\\"nginx\\"}]};function d(t){let e={a:\\"a\\",blockquote:\\"blockquote\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(e.p,{children:[\\"Elasticsearch\\",(0,i.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" recently released time series data streams for metrics. This not only provides better metrics support in Elastic Observability, but it also helps reduce \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elasticsearch-8-7-0\\",rel:\\"nofollow\\",children:\\"storage costs\\"}),\\". We discussed this in a \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elasticsearch-time-series-data-streams-observability-metrics\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"In this blog, we dive into how to enable and use time series data streams by reviewing what a time series metrics \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/documents-indices.html\\",rel:\\"nofollow\\",children:\\"document\\"}),\\" is and the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html\\",rel:\\"nofollow\\",children:\\"mapping\\"}),\\" used for enabling time series. In particular, we will showcase this by using Elastic Observability\\\\u2019s Nginx integration. As Elastic\\",(0,i.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/8.8/tsds.html\\",rel:\\"nofollow\\",children:\\"time series data stream (TSDS)\\"}),\\" metrics capabilities evolve, some of the scenarios below will change.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic TSDS stores metrics in indices optimized for a time series database (\\",(0,i.jsx)(e.a,{href:\\"https://en.wikipedia.org/wiki/Time_series_database\\",rel:\\"nofollow\\",children:\\"TSDB\\"}),\\"), which is used to store time series metrics. \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elasticsearch-8-7-0\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s TSDB also got a significant optimization in 8.7\\"}),\\" by reducing storage costs by upward of 70%.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"what-is-an-elastic-time-series-data-stream\\",children:\\"What is an Elastic time series data stream?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"A time series data stream (TSDS) models timestamped metrics data as one or more time series. In a TSDS, each Elasticsearch document represents an observation or data point in a specific time series. Although a TSDS can contain multiple time series, a document can only belong to one time series. A time series can\\\\u2019t span multiple data streams.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"A regular \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/data-streams.html\\",rel:\\"nofollow\\",children:\\"data stream\\"}),\\" can have different usages including logs. For metrics usage, however, a time series data stream is recommended. A time series data stream is different from a regular data stream in \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#differences-from-regular-data-stream\\",rel:\\"nofollow\\",children:\\"multiple ways\\"}),\\". A TSDS contains more than one predefined dimension and multiple metrics.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"nginx-metrics-as-an-example\\",children:\\"Nginx metrics as an example\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations?solution=observability\\",rel:\\"nofollow\\",children:\\"Integrations\\"}),\\" provide an easy way to ingest observability metrics for a large number of services and systems. We use the \\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/nginx\\",rel:\\"nofollow\\",children:\\"Nginx\\"}),\\" integration \\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/nginx#metrics-reference\\",rel:\\"nofollow\\",children:\\"metrics\\"}),\\" data set as an example here. This is one of the integrations, on which time series has been recently enabled.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"process-of-enabling-tsds-on-a-package\\",children:\\"Process of enabling TSDS on a package\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Time series is \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#time-series-mode\\",rel:\\"nofollow\\",children:\\"enabled\\"}),\\" on a metrics data stream of an \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/\\",rel:\\"nofollow\\",children:\\"integration\\"}),\\" package, after adding the relevant time series \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#time-series-metric\\",rel:\\"nofollow\\",children:\\"metrics\\"}),\\" and \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#time-series-dimension\\",rel:\\"nofollow\\",children:\\"dimension\\"}),\\" mappings. Existing integrations with metrics data streams will come with time series metrics enabled, so that users can use them as-is without any additional configuration.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"The image below captures a high-level summary of a time series data stream, the corresponding index template, the time series indices and a single document. We will shortly dive into the details of each of the fields in the document.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-1-time-series-data-stream-2.png\\",alt:\\"time series data stream\\",width:\\"1474\\",height:\\"1114\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"tsds-metric-document\\",children:\\"TSDS metric document\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Below we provide a snippet of an ingested Elastic document with time series metrics and dimension together.\\"}),`\\n`,(0,i.jsx)(e.pre,{children:(0,i.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"@timestamp\\": \\"2023-06-29T03:58:12.772Z\\",\\n\\n \\"nginx\\": {\\n \\"stubstatus\\": {\\n \\"accepts\\": 202,\\n \\"active\\": 2,\\n \\"current\\": 3,\\n \\"dropped\\": 0,\\n \\"handled\\": 202,\\n \\"hostname\\": \\"host.docker.internal:80\\",\\n \\"reading\\": 0,\\n \\"requests\\": 10217,\\n \\"waiting\\": 1,\\n \\"writing\\": 1\\n }\\n }\\n}\\n`})}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Multiple metrics per document:\\"}),(0,i.jsx)(e.br,{}),`\\n`,\\"An ingested \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/documents-indices.html\\",rel:\\"nofollow\\",children:\\"document\\"}),\\" has a collection of fields, including metrics fields. Multiple related metrics fields can be part of a single document. A document is part of a single \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/data-streams.html\\",rel:\\"nofollow\\",children:\\"data stream\\"}),\\", and typically all the metrics it contains are related. All the metrics in a document are part of the same time series.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Metric type and dimensions as mapping:\\"}),(0,i.jsx)(e.br,{}),`\\n`,\\"While the document contains the metrics details, the metric types and dimension details are defined as part of the field \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html\\",rel:\\"nofollow\\",children:\\"mapping\\"}),\\". All the time series relevant field mappings are defined collectively for a given datastream, as part of the package development. All the integrations released with time series data stream, contain all the relevant time series field mappings, as part of the package release. There are two additional mappings needed in particular: \\",(0,i.jsx)(e.strong,{children:\\"time_series_metric\\"}),\\" mapping and \\",(0,i.jsx)(e.strong,{children:\\"time_series_dimension\\"}),\\" mapping.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"metrics-types-fields\\",children:\\"Metrics types fields\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"A document contains the metric type fields (as shown above). The mappings for the metric type fields is done using \\",(0,i.jsx)(e.strong,{children:\\"time_series_metric\\"}),\\" mapping in the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/index-templates.html\\",rel:\\"nofollow\\",children:\\"index templates\\"}),\\" as given below:\\"]}),`\\n`,(0,i.jsx)(e.pre,{children:(0,i.jsx)(e.code,{className:\\"language-json\\",children:`\\"nginx\\": {\\n \\"properties\\": {\\n \\"stubstatus\\": {\\n \\"properties\\": {\\n \\"accepts\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"active\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n },\\n \\"current\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n },\\n \\"dropped\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"handled\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"reading\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n },\\n \\"requests\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"counter\\"\\n },\\n \\"waiting\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n },\\n \\"writing\\": {\\n \\"type\\": \\"long\\",\\n \\"time_series_metric\\": \\"gauge\\"\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"dimension-fields\\",children:\\"Dimension fields\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html#time-series-dimension\\",rel:\\"nofollow\\",children:\\"Dimensions\\"}),\\" are field names and values that, in combination, identify a document\\\\u2019s time series.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"In Elastic time series, there are some additional considerations for dimensions:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Dimension fields need to be defined for each time series. There will be no time series with zero dimension fields.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Keyword (or similar) type fields can be defined as dimensions.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"There is a current limit on the number of dimensions that can be defined in a data stream. The limit restrictions will likely be lifted going forward.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Dimension is common for all the metrics in a single document, as part of a data stream. Each time series data stream of a package (example: Nginx) already comes with a predefined set of dimension fields as below.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"The document would contain more than one dimension field. In the case of Nginx, \\",(0,i.jsx)(e.em,{children:\\"agend.id\\"}),\\" and \\",(0,i.jsx)(e.em,{children:\\"nginx.stubstatus.hostname\\"}),\\" are some of the dimension fields. The mappings for the dimension fields is done using \\",(0,i.jsx)(e.strong,{children:\\"time_series_dimension\\"}),\\" mapping as below:\\"]}),`\\n`,(0,i.jsx)(e.pre,{children:(0,i.jsx)(e.code,{className:\\"language-json\\",children:`\\"agent\\": {\\n \\"properties\\": {\\n \\"id\\": {\\n \\"type\\": \\"keyword\\",\\n \\"time_series_dimension\\": true\\n }\\n }\\n },\\n\\n\\"nginx\\": {\\n \\"properties\\": {\\n \\"stubstatus\\": {\\n \\"properties\\": {\\n \\"hostname\\": {\\n \\"type\\": \\"keyword\\",\\n \\"time_series_dimension\\": true\\n },\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"meta-fields\\",children:\\"Meta fields\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Documents ingested also have additional meta fields apart from the \\",(0,i.jsx)(e.em,{children:\\"metric\\"}),\\" and \\",(0,i.jsx)(e.em,{children:\\"dimension\\"}),\\" fields explained above. These additional fields provide richer query capabilities for the metrics.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.strong,{children:\\"Example Elastic meta fields\\"})}),`\\n`,(0,i.jsx)(e.pre,{children:(0,i.jsx)(e.code,{className:\\"language-json\\",children:`\\"data_stream\\": {\\n \\"dataset\\": \\"nginx.stubstatus\\",\\n \\"namespace\\": \\"default\\",\\n \\"type\\": \\"metrics\\"\\n }\\n`})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"discover-and-visualization-in-kibana\\",children:\\"Discover and visualization in Kibana\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Elastic provides comprehensive search and visualization for the time series metrics. Time series metrics can be searched as-is in \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/discover.html\\",rel:\\"nofollow\\",children:\\"Discover\\"}),\\". In the search below, the counter and gauges metrics are captured as \\",(0,i.jsx)(e.em,{children:\\"different icons\\"}),\\". Below we also provide examples of visualization for the time series metrics using \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/kibana/kibana-lens\\",rel:\\"nofollow\\",children:\\"Lens\\"}),\\" and OOTB dashboard included as part of the Nginx integration package.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-2-discover-search-tsds.png\\",alt:\\"Discover search for TSDS metrics\\",width:\\"648\\",height:\\"1340\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-3-lens.png\\",alt:\\"Maximum of counter field nginx.stubstatus.accepts visualized using Lens\\",width:\\"1999\\",height:\\"767\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-4-median-gauge.png\\",alt:\\"Median of gauge field nginx.stubstatus.active visualized using Lens\\",width:\\"1999\\",height:\\"731\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/nginx-metrics-elastic-time-series-data-streams/elastic-blog-5-multiple-line-graphs.png\\",alt:\\"OOTB Nginx dashboard with the TSDS metrics visualizations \\",width:\\"1999\\",height:\\"845\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out!\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"We have provided a detailed example of a time series document ingested by the Elastic Nginx integration. We have walked through how time series metrics are modeled in Elastic and the additional time series mappings with examples. We provided details of dimension requirements for Elastic time series, as well as brief examples of search/visualization/dashboard of TSDS metrics in Kibana\\",(0,i.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Don\\\\u2019t have an Elastic Cloud account yet? \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Sign up for Elastic Cloud\\"}),\\" and try out the auto-instrumentation capabilities that I discussed above. I would be interested in getting your feedback about your experience in gaining visibility into your application stack with Elastic.\\"]}),`\\n`,(0,i.jsxs)(e.blockquote,{children:[`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elasticsearch-time-series-data-streams-observability-metrics\\",rel:\\"nofollow\\",children:\\"How to use Elasticsearch and Time Series Data Streams for observability metrics\\"})}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds.html\\",rel:\\"nofollow\\",children:\\"Time Series Data Stream in Elastic documentation\\"}),\\"\\\\xA0\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elasticsearch-8-7-0\\",rel:\\"nofollow\\",children:\\"Efficient storage with Elastic Time Series Database\\"}),(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/\\",rel:\\"nofollow\\",children:\\"Elastic integrations catalog\\"})]}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/\\",rel:\\"nofollow\\",children:\\"Elastic integrations catalog\\"})}),`\\n`]}),`\\n`]})]})}function m(t={}){let{wrapper:e}=t.components||{};return e?(0,i.jsx)(e,{...t,children:(0,i.jsx)(d,{...t})}):d(t)}return v(_);})();\\n;return Component;"},"_id":"articles/nginx-metrics-elastic-time-series-data-streams.mdx","_raw":{"sourceFilePath":"articles/nginx-metrics-elastic-time-series-data-streams.mdx","sourceFileName":"nginx-metrics-elastic-time-series-data-streams.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/nginx-metrics-elastic-time-series-data-streams"},"type":"Article","imageUrl":"/assets/images/nginx-metrics-elastic-time-series-data-streams/time-series-data-streams-blog-720x420-1.jpg","readingTime":"11 min read","url":"/nginx-metrics-elastic-time-series-data-streams","headings":[{"level":2,"title":"What is an Elastic time series data stream?","href":"#what-is-an-elastic-time-series-data-stream"},{"level":2,"title":"Nginx metrics as an example","href":"#nginx-metrics-as-an-example"},{"level":2,"title":"Process of enabling TSDS on a package","href":"#process-of-enabling-tsds-on-a-package"},{"level":2,"title":"TSDS metric document","href":"#tsds-metric-document"},{"level":2,"title":"Metrics types fields","href":"#metrics-types-fields"},{"level":2,"title":"Dimension fields","href":"#dimension-fields"},{"level":2,"title":"Meta fields","href":"#meta-fields"},{"level":2,"title":"Discover and visualization in Kibana","href":"#discover-and-visualization-in-kibana"},{"level":2,"title":"Try it out!","href":"#try-it-out"}]},{"title":"Root cause analysis with logs: Elastic Observability\'s AIOps Labs","slug":"observability-logs-machine-learning-aiops","date":"2023-04-27","description":"Elastic Observability provides more than just log aggregation, metrics analysis, APM, and distributed tracing. Our machine learning-based AIOps capabilities help you analyze the root cause of issues allowing you to focus on the most important tasks.","image":"illustration-machine-learning-anomaly-1680x980.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"aiops","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the [previous blog](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability) in our root cause analysis with logs series, we explored how to analyze logs in Elastic Observability with Elastic’s anomaly detection and log categorization capabilities. Elastic’s platform enables you to get started on machine learning (ML) quickly. You don’t need to have a data science team or design a system architecture. Additionally, there’s no need to move data to a third-party framework for model training.\\n\\nPreconfigured [machine learning models](https://www.elastic.co/blog/may-2023-launch-machine-learning-models) for observability and security are available. If those don\'t work well enough on your data, in-tool wizards guide you through the few steps needed to configure custom anomaly detection and train your model with supervised learning. To get you started, there are several key features built into Elastic Observability to aid in analysis, bypassing the need to run specific ML models. These features help minimize the time and analysis of logs.\\n\\nLet’s review the set of machine learning-based observability features in Elastic:\\n\\n**Anomaly detection:** Elastic Observability, when turned on ([see documentation](https://www.elastic.co/guide/en/kibana/current/xpack-ml-anomalies.html)), automatically detects anomalies by continuously modeling the normal behavior of your time series data — learning trends, periodicity, and more — in real time to identify anomalies, streamline root cause analysis, and reduce false positives. Anomaly detection runs in and scales with Elasticsearch and includes an intuitive UI.\\n\\n**Log categorization:** Using anomaly detection, Elastic also identifies patterns in your log events quickly. Instead of manually identifying similar logs, the logs categorization view lists log events that have been grouped, based on their messages and formats, so that you can take action more quickly.\\n\\n**High-latency or erroneous transactions:** Elastic Observability’s APM capability helps you discover which attributes are contributing to increased transaction latency and identifies which attributes are most influential in distinguishing between transaction failures and successes. Read [APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions) for an overview of this capability.\\n\\n**AIOps Labs:** AIOps Labs provides two main capabilities using advanced statistical methods:\\n\\n- **Log spike detector** helps identify reasons for increases in log rates. It makes it easy to find and investigate the causes of unusual spikes by using the analysis workflow view. Examine the histogram chart of the log rates for a given data view, and find the reason behind a particular change possibly in millions of log events across multiple fields and values.\\n- **Log pattern analysis** helps you find patterns in unstructured log messages and makes it easier to examine your data. It performs categorization analysis on a selected field of a data view, creates categories based on the data, and displays them together with a chart that shows the distribution of each category and an example document that matches the category.\\n\\nAs we showed in the [last blog](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability), using machine learning-based features helps minimize the extremely tedious and time-consuming process of analyzing data using traditional methods, such as alerting and simple pattern matching (visual or simple searching, etc.). Trying to find the needle in the haystack requires the use of some level of artificial intelligence due to the increasing amounts of telemetry data (logs, metrics, and traces) being collected across ever-growing applications.\\n\\nIn this blog post, we’ll cover two capabilities found in Elastic’s AIOps Labs: log spike detector and log pattern analysis. We’ll use the same data from the previous blog and analyze it using these two capabilities.\\n\\n_ **We will cover log spike detector and log pattern analysis against the popular Hipster Shop app developed by Google, and modified recently by OpenTelemetry.** _\\n\\nOverviews of high-latency capabilities can be found [here](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions), and an overview of AIOps labs can be found [here](https://www.youtube.com/watch?v=jgHxzUNzfhM&list=PLhLSfisesZItlRZKgd-DtYukNfpThDAv_&index=5).\\n\\nBelow, we will examine a scenario where we use anomaly detection and log categorization to help identify a root cause of an issue in Hipster Shop.\\n\\n## Prerequisites and config\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)) on AWS. Deploying this on AWS is required for Elastic Serverless Forwarder.\\n- Utilize a version of the popular [Hipster Shop](https://github.com/GoogleCloudPlatform/microservices-demo) demo application. It was originally written by Google to showcase Kubernetes across a multitude of variants available, such as the [OpenTelemetry Demo App](https://github.com/open-telemetry/opentelemetry-demo). The Elastic version is found [here](https://github.com/elastic/opentelemetry-demo).\\n- Ensure you have configured the app for either Elastic APM agents or OpenTelemetry agents. For more details, please refer to these two blogs: [Independence with OTel in Elastic](https://www.elastic.co/blog/opentelemetry-observability) and [Observability and Security with OTel in Elastic](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry). Additionally, review the [OTel documentation in Elastic](https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html).\\n- Look through an overview of [Elastic Observability APM capabilities](https://www.elastic.co/guide/en/observability/current/apm.html).\\n- Look through our [anomaly detection documentation](https://www.elastic.co/guide/en/observability/8.5/inspect-log-anomalies.html) for logs and [log categorization documentation](https://www.elastic.co/guide/en/observability/8.5/categorize-logs.html).\\n\\nOnce you’ve instrumented your application with APM (Elastic or OTel) agents and are ingesting metrics and logs into Elastic Observability, you should see a service map for the application as follows:\\n\\n![observability service map](/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-service-map.png)\\n\\nIn our example, we’ve introduced issues to help walk you through the root cause analysis features. You might have a different set of issues depending on how you load the application and/or introduce specific feature flags.\\n\\nAs part of the walk-through, we’ll assume we are DevOps or SRE managing this application in production.\\n\\n## Root cause analysis\\n\\nWhile the application has been running normally for some time, you get a notification that some of the services are unhealthy. This can occur from the notification setting you’ve set up in Elastic or other external notification platforms (including customer-related issues). In this instance, we’re assuming that customer support has called in multiple customer complaints about the website.\\n\\nHow do you as a DevOps or SRE investigate this? We will walk through two avenues in Elastic to investigate the issue:\\n\\n- Log spike analysis\\n- Log pattern analysis\\n\\nWhile we show these two paths separately, they can be used in conjunction and are complementary, as they are both tools Elastic Observability provides to help you troubleshoot and identify a root cause.\\n\\nStarting with the service map, you can see anomalies identified with red circles and as we select them, Elastic will provide a score for the anomaly.\\n\\n![observability service map service details](/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-service-map-service-details.png)\\n\\nIn this example, we can see that there is a score of 96 for a specific anomaly for the productCatalogService in the Hipster Shop application. An anomaly score indicates the significance of the anomaly compared to previously seen anomalies. Rather than jump into anomaly detection (see previous [blog](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability)), let’s look at some of the potential issues by reviewing the service details in APM.\\n\\n![observability product catalog service overview](/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-product-catalog-service-overview.png)\\n\\nWhat we see for the productCatalogService is that there are latency issues, failed transactions, a large number of issues, and a dependency to PostgreSQL. When we look at the errors in more detail and drill down, we see they are all coming from [PQ - which is a PostgreSQL driver in Go](https://pkg.go.dev/github.com/lib/pq).\\n\\n![observability product catalog service errors](/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-product-catalog-service-errors.png)\\n\\nAs we drill further, we still can’t tell why the productCatalogService is not able to pull information from the PostgreSQL database.\\n\\n![observability product catalog service error group](/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-product-catalog-service-error-group.png)\\n\\nWe see that there is a spike in errors, so let\'s see if we can gleam further insight using one of our two options:\\n\\n- Log rate spikes\\n- Log pattern analysis\\n\\n### Log rate spikes\\n\\nLet’s start with the **log rate spikes** detector capability from Elastic’s AIOps Labs section of Elastic’s machine learning capabilities. We also pre-select analyzing the spike against a baseline history.\\n\\n![explain log rate spikes postgres](/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-explain-log-rate-spikes-postgres.png)\\n\\nThe log rate spikes detector has looked at all the logs from the spike and compared them to the baseline, and it\'s seeing higher-than-normal counts in specific log messages. From a visual inspection, we see that PostgreSQL log messages are high. We further filter this with postgres.\\n\\n![explain log rates spikes pgbench](/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-explain-log-rate-spikes-pgbench.png)\\n\\nWe immediately notice that this issue is potentially caused by pgbench, a popular PostgreSQL tool to help benchmark the database. pgbench runs the same sequence of SQL commands over and over, possibly in multiple, concurrent database sessions. While pgbench is definitely a useful tool, it should not be used in a production environment as it causes a heavy load on the database host, likely causing higher latency issues on the site.\\n\\nWhile this may or may not be the ultimate root cause, we have rather quickly identified a potential issue that has a high probability of being the root cause. An engineer likely intended to run pgbench against a staging database to evaluate its performance, and not the production environment.\\n\\n### Log pattern analysis\\n\\nInstead of log rate spikes, let’s use log pattern analysis to investigate the spike in errors we saw in productCatalogService. In AIOps Labs, we simply select Log Pattern Analysis, use Logs data, filter the results with postgres (since we know it\'s related to PostgreSQL), and look at information from the message field of the logs we are processing. We see the following:\\n\\n![observability explain log pattern analysis](/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-explain-log-pattern-analysis.png)\\n\\nAlmost immediately we see the biggest pattern it finds is a log message where pgbench is updating the database. We can further directly drill into this log message from log pattern analysis into Discover and review the details and further analyze the messages.\\n\\n![expanded document](/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-expanded-document.png)\\n\\nAs we mentioned in the previous section, while it may or may not be the root cause, it quickly gives us a place to start and a potential root cause. A developer likely intended to run pgbench against a staging database to evaluate its performance, and not the production environment.\\n\\n## Conclusion\\n\\nBetween the [first blog](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability) and this one, we’ve shown how Elastic Observability can help you further identify and get closer to pinpointing the root cause of issues without having to look for a “needle in a haystack.” Here’s a quick recap of what you learned in this blog.\\n\\n- Elastic Observability has numerous capabilities to help you reduce your time to find the root cause and improve your MTTR (even MTTD). In particular, we reviewed the following two main capabilities (found in AIOps Labs in Elastic) in this blog:\\n\\n 1. **Log rate spikes** detector helps identify reasons for increases in log rates. It makes it easy to find and investigate the causes of unusual spikes by using the analysis workflow view. Examine the histogram chart of the log rates for a given data view, and find the reason behind a particular change possibly in millions of log events across multiple fields and values.\\n 2. **Log pattern analysis** helps you find patterns in unstructured log messages and makes it easier to examine your data. It performs categorization analysis on a selected field of a data view, creates categories based on the data, and displays them together with a chart that shows the distribution of each category and an example document that matches the category.\\n\\n- You learned how easy and simple it is to use Elastic Observability’s log categorization and anomaly detection capabilities without having to understand machine learning (which helps drive these features) or having to do any lengthy setups.\\n\\nReady to get started? [Register for Elastic Cloud](https://cloud.elastic.co/registration) and try out the features and capabilities outlined above.\\n\\n### Additional logging resources:\\n\\n- [Getting started with logging on Elastic (quickstart)](https://www.elastic.co/getting-started/observability/collect-and-analyze-logs)\\n- [Ingesting common known logs via integrations (compute node example)](https://www.elastic.co/guide/en/observability/current/logs-metrics-get-started.html)\\n- [List of integrations](https://docs.elastic.co/integrations)\\n- [Ingesting custom application logs into Elastic](https://www.elastic.co/blog/log-monitoring-management-enterprise)\\n- [Enriching logs in Elastic](https://www.elastic.co/blog/observability-logs-parsing-schema-read-write)\\n- Analyzing Logs with [Anomaly Detection (ML)](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability) and [AIOps](https://www.elastic.co/blog/observability-logs-machine-learning-aiops)\\n\\n### Common use case examples with logs:\\n\\n- [Nginx log management](https://youtu.be/ax04ZFWqVCg)\\n- [AWS VPC Flow log management](https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability)\\n- [Using OpenAI to analyze Kubernetes errors](https://www.elastic.co/blog/kubernetes-errors-elastic-observability-logs-openai)\\n- [PostgreSQL issue analysis with AIOps](https://youtu.be/Li5TJAWbz8Q)\\n\\n_Elastic and Elasticsearch are trademarks, logos or registered trademarks of Elasticsearch B.V. in the United States and other countries._\\n","code":"var Component=(()=>{var g=Object.create;var s=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var y=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),w=(t,e)=>{for(var a in e)s(t,a,{get:e[a],enumerable:!0})},l=(t,e,a,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of u(e))!b.call(t,n)&&n!==a&&s(t,n,{get:()=>e[n],enumerable:!(o=p(e,n))||o.enumerable});return t};var f=(t,e,a)=>(a=t!=null?g(m(t)):{},l(e||!t||!t.__esModule?s(a,\\"default\\",{value:t,enumerable:!0}):a,t)),v=t=>l(s({},\\"__esModule\\",{value:!0}),t);var c=y((L,r)=>{r.exports=_jsx_runtime});var E={};w(E,{default:()=>d,frontmatter:()=>k});var i=f(c()),k={title:\\"Root cause analysis with logs: Elastic Observability\'s AIOps Labs\\",slug:\\"observability-logs-machine-learning-aiops\\",date:\\"2023-04-27\\",description:\\"Elastic Observability provides more than just log aggregation, metrics analysis, APM, and distributed tracing. Our machine learning-based AIOps capabilities help you analyze the root cause of issues allowing you to focus on the most important tasks.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"illustration-machine-learning-anomaly-1680x980.png\\",tags:[{slug:\\"aiops\\"},{slug:\\"log-analytics\\"}]};function h(t){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(e.p,{children:[\\"In the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\" in our root cause analysis with logs series, we explored how to analyze logs in Elastic Observability with Elastic\\\\u2019s anomaly detection and log categorization capabilities. Elastic\\\\u2019s platform enables you to get started on machine learning (ML) quickly. You don\\\\u2019t need to have a data science team or design a system architecture. Additionally, there\\\\u2019s no need to move data to a third-party framework for model training.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Preconfigured \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/may-2023-launch-machine-learning-models\\",rel:\\"nofollow\\",children:\\"machine learning models\\"}),\\" for observability and security are available. If those don\'t work well enough on your data, in-tool wizards guide you through the few steps needed to configure custom anomaly detection and train your model with supervised learning. To get you started, there are several key features built into Elastic Observability to aid in analysis, bypassing the need to run specific ML models. These features help minimize the time and analysis of logs.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Let\\\\u2019s review the set of machine learning-based observability features in Elastic:\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Anomaly detection:\\"}),\\" Elastic Observability, when turned on (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/xpack-ml-anomalies.html\\",rel:\\"nofollow\\",children:\\"see documentation\\"}),\\"), automatically detects anomalies by continuously modeling the normal behavior of your time series data \\\\u2014 learning trends, periodicity, and more \\\\u2014 in real time to identify anomalies, streamline root cause analysis, and reduce false positives. Anomaly detection runs in and scales with Elasticsearch and includes an intuitive UI.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Log categorization:\\"}),\\" Using anomaly detection, Elastic also identifies patterns in your log events quickly. Instead of manually identifying similar logs, the logs categorization view lists log events that have been grouped, based on their messages and formats, so that you can take action more quickly.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"High-latency or erroneous transactions:\\"}),\\" Elastic Observability\\\\u2019s APM capability helps you discover which attributes are contributing to increased transaction latency and identifies which attributes are most influential in distinguishing between transaction failures and successes. Read \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions\\"}),\\" for an overview of this capability.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"AIOps Labs:\\"}),\\" AIOps Labs provides two main capabilities using advanced statistical methods:\\"]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Log spike detector\\"}),\\" helps identify reasons for increases in log rates. It makes it easy to find and investigate the causes of unusual spikes by using the analysis workflow view. Examine the histogram chart of the log rates for a given data view, and find the reason behind a particular change possibly in millions of log events across multiple fields and values.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Log pattern analysis\\"}),\\" helps you find patterns in unstructured log messages and makes it easier to examine your data. It performs categorization analysis on a selected field of a data view, creates categories based on the data, and displays them together with a chart that shows the distribution of each category and an example document that matches the category.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"As we showed in the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:\\"last blog\\"}),\\", using machine learning-based features helps minimize the extremely tedious and time-consuming process of analyzing data using traditional methods, such as alerting and simple pattern matching (visual or simple searching, etc.). Trying to find the needle in the haystack requires the use of some level of artificial intelligence due to the increasing amounts of telemetry data (logs, metrics, and traces) being collected across ever-growing applications.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"In this blog post, we\\\\u2019ll cover two capabilities found in Elastic\\\\u2019s AIOps Labs: log spike detector and log pattern analysis. We\\\\u2019ll use the same data from the previous blog and analyze it using these two capabilities.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"_ \\",(0,i.jsx)(e.strong,{children:\\"We will cover log spike detector and log pattern analysis against the popular Hipster Shop app developed by Google, and modified recently by OpenTelemetry.\\"}),\\" _\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Overviews of high-latency capabilities can be found \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", and an overview of AIOps labs can be found \\",(0,i.jsx)(e.a,{href:\\"https://www.youtube.com/watch?v=jgHxzUNzfhM&list=PLhLSfisesZItlRZKgd-DtYukNfpThDAv_&index=5\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Below, we will examine a scenario where we use anomaly detection and log categorization to help identify a root cause of an issue in Hipster Shop.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[\\"Ensure you have an account on \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\") on AWS. Deploying this on AWS is required for Elastic Serverless Forwarder.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Utilize a version of the popular \\",(0,i.jsx)(e.a,{href:\\"https://github.com/GoogleCloudPlatform/microservices-demo\\",rel:\\"nofollow\\",children:\\"Hipster Shop\\"}),\\" demo application. It was originally written by Google to showcase Kubernetes across a multitude of variants available, such as the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Demo App\\"}),\\". The Elastic version is found \\",(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Ensure you have configured the app for either Elastic APM agents or OpenTelemetry agents. For more details, please refer to these two blogs: \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OTel in Elastic\\"}),\\" and \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Observability and Security with OTel in Elastic\\"}),\\". Additionally, review the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html\\",rel:\\"nofollow\\",children:\\"OTel documentation in Elastic\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Look through an overview of \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm.html\\",rel:\\"nofollow\\",children:\\"Elastic Observability APM capabilities\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Look through our \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.5/inspect-log-anomalies.html\\",rel:\\"nofollow\\",children:\\"anomaly detection documentation\\"}),\\" for logs and \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.5/categorize-logs.html\\",rel:\\"nofollow\\",children:\\"log categorization documentation\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once you\\\\u2019ve instrumented your application with APM (Elastic or OTel) agents and are ingesting metrics and logs into Elastic Observability, you should see a service map for the application as follows:\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-service-map.png\\",alt:\\"observability service map\\",width:\\"1713\\",height:\\"1239\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"In our example, we\\\\u2019ve introduced issues to help walk you through the root cause analysis features. You might have a different set of issues depending on how you load the application and/or introduce specific feature flags.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"As part of the walk-through, we\\\\u2019ll assume we are DevOps or SRE managing this application in production.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"root-cause-analysis\\",children:\\"Root cause analysis\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"While the application has been running normally for some time, you get a notification that some of the services are unhealthy. This can occur from the notification setting you\\\\u2019ve set up in Elastic or other external notification platforms (including customer-related issues). In this instance, we\\\\u2019re assuming that customer support has called in multiple customer complaints about the website.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"How do you as a DevOps or SRE investigate this? We will walk through two avenues in Elastic to investigate the issue:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Log spike analysis\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Log pattern analysis\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"While we show these two paths separately, they can be used in conjunction and are complementary, as they are both tools Elastic Observability provides to help you troubleshoot and identify a root cause.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Starting with the service map, you can see anomalies identified with red circles and as we select them, Elastic will provide a score for the anomaly.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-service-map-service-details.png\\",alt:\\"observability service map service details\\",width:\\"1629\\",height:\\"1211\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"In this example, we can see that there is a score of 96 for a specific anomaly for the productCatalogService in the Hipster Shop application. An anomaly score indicates the significance of the anomaly compared to previously seen anomalies. Rather than jump into anomaly detection (see previous \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\"), let\\\\u2019s look at some of the potential issues by reviewing the service details in APM.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-product-catalog-service-overview.png\\",alt:\\"observability product catalog service overview\\",width:\\"1703\\",height:\\"1158\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"What we see for the productCatalogService is that there are latency issues, failed transactions, a large number of issues, and a dependency to PostgreSQL. When we look at the errors in more detail and drill down, we see they are all coming from \\",(0,i.jsx)(e.a,{href:\\"https://pkg.go.dev/github.com/lib/pq\\",rel:\\"nofollow\\",children:\\"PQ - which is a PostgreSQL driver in Go\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-product-catalog-service-errors.png\\",alt:\\"observability product catalog service errors\\",width:\\"1710\\",height:\\"1167\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"As we drill further, we still can\\\\u2019t tell why the productCatalogService is not able to pull information from the PostgreSQL database.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-product-catalog-service-error-group.png\\",alt:\\"observability product catalog service error group\\",width:\\"1710\\",height:\\"1128\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"We see that there is a spike in errors, so let\'s see if we can gleam further insight using one of our two options:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Log rate spikes\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Log pattern analysis\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"log-rate-spikes\\",children:\\"Log rate spikes\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Let\\\\u2019s start with the \\",(0,i.jsx)(e.strong,{children:\\"log rate spikes\\"}),\\" detector capability from Elastic\\\\u2019s AIOps Labs section of Elastic\\\\u2019s machine learning capabilities. We also pre-select analyzing the spike against a baseline history.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-explain-log-rate-spikes-postgres.png\\",alt:\\"explain log rate spikes postgres\\",width:\\"1704\\",height:\\"1078\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"The log rate spikes detector has looked at all the logs from the spike and compared them to the baseline, and it\'s seeing higher-than-normal counts in specific log messages. From a visual inspection, we see that PostgreSQL log messages are high. We further filter this with postgres.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-explain-log-rate-spikes-pgbench.png\\",alt:\\"explain log rates spikes pgbench\\",width:\\"1698\\",height:\\"921\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"We immediately notice that this issue is potentially caused by pgbench, a popular PostgreSQL tool to help benchmark the database. pgbench runs the same sequence of SQL commands over and over, possibly in multiple, concurrent database sessions. While pgbench is definitely a useful tool, it should not be used in a production environment as it causes a heavy load on the database host, likely causing higher latency issues on the site.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"While this may or may not be the ultimate root cause, we have rather quickly identified a potential issue that has a high probability of being the root cause. An engineer likely intended to run pgbench against a staging database to evaluate its performance, and not the production environment.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"log-pattern-analysis\\",children:\\"Log pattern analysis\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Instead of log rate spikes, let\\\\u2019s use log pattern analysis to investigate the spike in errors we saw in productCatalogService. In AIOps Labs, we simply select Log Pattern Analysis, use Logs data, filter the results with postgres (since we know it\'s related to PostgreSQL), and look at information from the message field of the logs we are processing. We see the following:\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-explain-log-pattern-analysis.png\\",alt:\\"observability explain log pattern analysis\\",width:\\"1711\\",height:\\"1025\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Almost immediately we see the biggest pattern it finds is a log message where pgbench is updating the database. We can further directly drill into this log message from log pattern analysis into Discover and review the details and further analyze the messages.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/observability-logs-machine-learning-aiops/blog-elastic-observability-expanded-document.png\\",alt:\\"expanded document\\",width:\\"1714\\",height:\\"1217\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"As we mentioned in the previous section, while it may or may not be the root cause, it quickly gives us a place to start and a potential root cause. A developer likely intended to run pgbench against a staging database to evaluate its performance, and not the production environment.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Between the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:\\"first blog\\"}),\\" and this one, we\\\\u2019ve shown how Elastic Observability can help you further identify and get closer to pinpointing the root cause of issues without having to look for a \\\\u201Cneedle in a haystack.\\\\u201D Here\\\\u2019s a quick recap of what you learned in this blog.\\"]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic Observability has numerous capabilities to help you reduce your time to find the root cause and improve your MTTR (even MTTD). In particular, we reviewed the following two main capabilities (found in AIOps Labs in Elastic) in this blog:\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Log rate spikes\\"}),\\" detector helps identify reasons for increases in log rates. It makes it easy to find and investigate the causes of unusual spikes by using the analysis workflow view. Examine the histogram chart of the log rates for a given data view, and find the reason behind a particular change possibly in millions of log events across multiple fields and values.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Log pattern analysis\\"}),\\" helps you find patterns in unstructured log messages and makes it easier to examine your data. It performs categorization analysis on a selected field of a data view, creates categories based on the data, and displays them together with a chart that shows the distribution of each category and an example document that matches the category.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"You learned how easy and simple it is to use Elastic Observability\\\\u2019s log categorization and anomaly detection capabilities without having to understand machine learning (which helps drive these features) or having to do any lengthy setups.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Ready to get started? \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Register for Elastic Cloud\\"}),\\" and try out the features and capabilities outlined above.\\"]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"additional-logging-resources\\",children:\\"Additional logging resources:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/getting-started/observability/collect-and-analyze-logs\\",rel:\\"nofollow\\",children:\\"Getting started with logging on Elastic (quickstart)\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/logs-metrics-get-started.html\\",rel:\\"nofollow\\",children:\\"Ingesting common known logs via integrations (compute node example)\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations\\",rel:\\"nofollow\\",children:\\"List of integrations\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/log-monitoring-management-enterprise\\",rel:\\"nofollow\\",children:\\"Ingesting custom application logs into Elastic\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-parsing-schema-read-write\\",rel:\\"nofollow\\",children:\\"Enriching logs in Elastic\\"})}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Analyzing Logs with \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:\\"Anomaly Detection (ML)\\"}),\\" and \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-machine-learning-aiops\\",rel:\\"nofollow\\",children:\\"AIOps\\"})]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"common-use-case-examples-with-logs\\",children:\\"Common use case examples with logs:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://youtu.be/ax04ZFWqVCg\\",rel:\\"nofollow\\",children:\\"Nginx log management\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability\\",rel:\\"nofollow\\",children:\\"AWS VPC Flow log management\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-errors-elastic-observability-logs-openai\\",rel:\\"nofollow\\",children:\\"Using OpenAI to analyze Kubernetes errors\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://youtu.be/Li5TJAWbz8Q\\",rel:\\"nofollow\\",children:\\"PostgreSQL issue analysis with AIOps\\"})}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"Elastic and Elasticsearch are trademarks, logos or registered trademarks of Elasticsearch B.V. in the United States and other countries.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,i.jsx)(e,{...t,children:(0,i.jsx)(h,{...t})}):h(t)}return v(E);})();\\n;return Component;"},"_id":"articles/observability-logs-machine-learning-aiops.mdx","_raw":{"sourceFilePath":"articles/observability-logs-machine-learning-aiops.mdx","sourceFileName":"observability-logs-machine-learning-aiops.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/observability-logs-machine-learning-aiops"},"type":"Article","imageUrl":"/assets/images/observability-logs-machine-learning-aiops/illustration-machine-learning-anomaly-1680x980.png","readingTime":"11 min read","url":"/observability-logs-machine-learning-aiops","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"Root cause analysis","href":"#root-cause-analysis"},{"level":3,"title":"Log rate spikes","href":"#log-rate-spikes"},{"level":3,"title":"Log pattern analysis","href":"#log-pattern-analysis"},{"level":2,"title":"Conclusion","href":"#conclusion"},{"level":3,"title":"Additional logging resources:","href":"#additional-logging-resources"},{"level":3,"title":"Common use case examples with logs:","href":"#common-use-case-examples-with-logs"}]},{"title":"Monitoring service performance: An overview of SLA calculation for Elastic Observability","slug":"observability-sla-calculations-transforms","date":"2023-04-24","description":"Elastic Stack provides many valuable insights for different users, such as reports on service performance and if the service level agreement (SLA) is met. In this post, we’ll provide an overview of calculating an SLA for Elastic Observability.","image":"illustration-analytics-report-1680x980.png","author":[{"slug":"philipp-kahr","type":"Author","_raw":{}}],"tags":[{"slug":"synthetics","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"slo","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic Stack provides many valuable insights for different users. Developers are interested in low-level metrics and debugging information. [SREs](https://www.elastic.co/blog/elastic-observability-sre-incident-response) are interested in seeing everything at once and identifying where the root cause is. Managers want reports that tell them how good service performance is and if the service level agreement (SLA) is met. In this post, we’ll focus on the service perspective and provide an overview of calculating an SLA.\\n\\n_Since version 8.8, we have a built in functionality to calculate SLOs —_ [_check out our guide_](https://www.elastic.co/guide/en/observability/current/slo.html)_!_\\n\\n## Foundations of calculating an SLA\\n\\nThere are many ways to calculate and measure an SLA. The most important part is the definition of the SLA, and as a consultant, I’ve seen many different ways. Some examples include:\\n\\n- Count of HTTP 2xx must be above 98% of all HTTP status\\n- Response time of successful HTTP 2xx requests must be below x milliseconds\\n- Synthetic monitor must be up at least 99%\\n- 95% of all batch transactions from the billing service need to complete within 4 seconds\\n\\nDepending on the origin of the data, calculating the SLA can be easier or more difficult. For uptime (Synthetic Monitoring), we automatically provide SLA values and offer out-of-the-box alerts to simply define alert when availability below 98% for the last 1 hour.\\n\\n![overview monitor details](/assets/images/observability-sla-calculations-transforms/blog-elastic-overview-monitor-details.png)\\n\\nI personally recommend using [Elastic Synthetic Monitoring](https://www.elastic.co/blog/new-synthetic-monitoring-observability) whenever possible to monitor service performance. Running HTTP requests and verifying the answers from the service, or doing fully fledged browser monitors and clicking through the website as a real user does, ensures a better understanding of the health of your service.\\n\\nSometimes this is impossible because you want to calculate the uptime of a specific Windows Service that does not offer any TCP port or HTTP interaction. Here the caveat applies that just because the service is running, it does not necessarily imply that the service is working fine.\\n\\n## Transforms to the rescue\\n\\nWe have identified our important service. In our case, it is the Steam Client Helper. There are two ways to solve this.\\n\\n### Lens formula\\n\\nYou can use Lens and formula (for a deep dive into formulas, [check out this blog](https://www.elastic.co/blog/how-tough-was-your-workout-take-a-closer-look-at-strava-data-through-kibana-lens)). Use the Search bar to filter down the data you want. Then use the formula option in Lens. We are dividing all counts of records with Running as a state and dividing it by the overall count of records. This is a nice solution when there is a need to calculate quickly and on the fly.\\n\\n```sql\\ncount(kql=\'windows.service.state: \\"Running\\" \')/count()\\n```\\n\\nUsing the formula posted above as the bar chart\'s vertical axis calculates the uptime percentage. We use an annotation to mark why there is a dip and why this service was below the threshold. The annotation is set to reboot, which indicates a reboot happening, and thus, the service was down for a moment. Lastly, we add a reference line and set this to our defined threshold at 98%. This ensures that a quick look at the visualization allows our eyes to gauge if we are above or below the threshold.\\n\\n![visualization](/assets/images/observability-sla-calculations-transforms/blog-elastic-visualization.png)\\n\\n### Transform\\n\\nWhat if I am not interested in just one service, but there are multiple services needed for your SLA? That is where Transforms can solve this problem. Furthermore, the second issue is that this data is only available inside the Lens. Therefore, we cannot create any alerts on this.\\n\\nGo to Transforms and create a pivot transform.\\n\\n1. Add the following filter to narrow it to only services data sets: data_stream.dataset: \\"windows.service\\". If you are interested in a specific service, you can always add it to the search bar if you want to know if a specific remote management service is up in your entire fleet!\\n\\n2. Select data histogram(@timestamp) and set it to your chosen unit. By default, the Elastic Agent only collects service states every 60 seconds. I am going with 1 hour.\\n\\n3. Select agent.name and windows.service.name as well.\\n\\n![transform configuration](/assets/images/observability-sla-calculations-transforms/blog-elastic-transform-configuration.png)\\n\\n4. Now we need to define an aggregation type. We will use a value_count of windows.service.state. That just counts how many records have this value.\\n\\n![aggregations](/assets/images/observability-sla-calculations-transforms/blog-elastic-aggregations.png)\\n\\n5. Rename the value_count to total_count.\\n\\n6. Add value_count for windows.service.state a second time and use the pencil icon to edit it to terms, which aggregates for running.\\n\\n![aggregations apply](/assets/images/observability-sla-calculations-transforms/blog-elastic-aggregations-apply.png)\\n\\n7. This opens up a sub-aggregation. Once again, select value_count(windows.service.state) and rename it to values.\\n\\n8. Now, the preview shows us the count of records with any states and the count of running.\\n\\n![transform configuration](/assets/images/observability-sla-calculations-transforms/blog-elastic-transform-configuration-next.png)\\n\\n9. Here comes the tricky part. We need to write some custom aggregations to calculate the percentage of uptime. Click on the copy icon next to the edit JSON config.\\n\\n10. In a new tab, go to Dev Tools. Paste what you have in the clipboard.\\n\\n11. Press the play button or use the keyboard shortcut ctrl+enter/cmd+enter and run it. This will create a preview of what the data looks like. It should give you the same information as in the table preview.\\n\\n12. Now, we need to calculate the percentage of up, which means doing a bucket script where we divide running.values by total_count, just like we did in the Lens visualization. Suppose you name the columns differently or use more than a single value. In that case, you will need to adapt accordingly.\\n\\n```json\\n\\"availability\\": {\\n \\"bucket_script\\": {\\n \\"buckets_path\\": {\\n \\"up\\": \\"running>values\\",\\n \\"total\\": \\"total_count\\"\\n },\\n \\"script\\": \\"params.up/params.total\\"\\n }\\n }\\n```\\n\\n13. This is the entire transform for me:\\n\\n```bash\\nPOST _transform/_preview\\n{\\n \\"source\\": {\\n \\"index\\": [\\n \\"metrics-*\\"\\n ]\\n },\\n \\"pivot\\": {\\n \\"group_by\\": {\\n \\"@timestamp\\": {\\n \\"date_histogram\\": {\\n \\"field\\": \\"@timestamp\\",\\n \\"calendar_interval\\": \\"1h\\"\\n }\\n },\\n \\"agent.name\\": {\\n \\"terms\\": {\\n \\"field\\": \\"agent.name\\"\\n }\\n },\\n \\"windows.service.name\\": {\\n \\"terms\\": {\\n \\"field\\": \\"windows.service.name\\"\\n }\\n }\\n },\\n \\"aggregations\\": {\\n \\"total_count\\": {\\n \\"value_count\\": {\\n \\"field\\": \\"windows.service.state\\"\\n }\\n },\\n \\"running\\": {\\n \\"filter\\": {\\n \\"term\\": {\\n \\"windows.service.state\\": \\"Running\\"\\n }\\n },\\n \\"aggs\\": {\\n \\"values\\": {\\n \\"value_count\\": {\\n \\"field\\": \\"windows.service.state\\"\\n }\\n }\\n }\\n },\\n \\"availability\\": {\\n \\"bucket_script\\": {\\n \\"buckets_path\\": {\\n \\"up\\": \\"running>values\\",\\n \\"total\\": \\"total_count\\"\\n },\\n \\"script\\": \\"params.up/params.total\\"\\n }\\n }\\n }\\n }\\n}\\n```\\n\\n14. The preview in Dev Tools should work and be complete. Otherwise, you must debug any errors. Most of the time, it is the bucket script and the path to the values. You might have called it up instead of running. This is what the preview looks like for me.\\n\\n```json\\n{\\n \\"running\\": {\\n \\"values\\": 1\\n },\\n \\"agent\\": {\\n \\"name\\": \\"AnnalenasMac\\"\\n },\\n \\"@timestamp\\": \\"2021-12-07T19:00:00.000Z\\",\\n \\"total_count\\": 1,\\n \\"availability\\": 1,\\n \\"windows\\": {\\n \\"service\\": {\\n \\"name\\": \\"InstallService\\"\\n }\\n }\\n},\\n```\\n\\n15. Now we only paste the bucket script into the transform creation UI after selecting Edit JSON. It looks like this:\\n\\n![transform configuration pivot configuration object](/assets/images/observability-sla-calculations-transforms/blog-elastic-transform-configuration-pivot-configuration-object.png)\\n\\n16. Give your transform a name, set the destination index, and run it continuously. When selecting this, please also make sure not to use @timestamp. Instead, opt for event.ingested. [Our documentation explains this in detail](https://www.elastic.co/guide/en/elasticsearch/reference/current/transform-checkpoints.html).\\n\\n![transform details](/assets/images/observability-sla-calculations-transforms/blog-elastic-transform-details.png)\\n\\n17. Click next and create and start. This can take a bit, so don’t worry.\\n\\nTo summarize, we have now created a pivot transform using a bucket script aggregation to calculate the running time of a service in percentage. There is a caveat because Elastic Agent, per default, only collects the every 60 seconds the services state. It can be that a service is up exactly when collected and down a few seconds later. If it is that important and no other monitoring possibilities, such as [Elastic Synthetics](https://www.elastic.co/blog/what-can-elastic-synthetics-tell-us-about-kibana-dashboards) are possible, you might want to reduce the collection time on the Agent side to get the services state every 30 seconds, 45 seconds. Depending on how important your thresholds are, you can create multiple policies having different collection times. This ensures that a super important server might collect the services state every 10 seconds because you need as much granularity and insurance for the correctness of the metric. For normal workstations where you just want to know if your remote access solution is up the majority of the time, you might not mind having a single metric every 60 seconds.\\n\\nAfter you have created the transform, one additional feature you get is that the data is stored in an index, similar to in Elasticsearch. When you just do the visualization, the metric is calculated for this visualization only and not available anywhere else. Since this is now data, you can create a threshold alert to your favorite connection (Slack, Teams, Service Now, Mail, and so [many more to choose from](https://www.elastic.co/guide/en/kibana/current/action-types.html)).\\n\\n## Visualizing the transformed data\\n\\nThe transform created a data view called windows-service. The first thing we want to do is change the format of the availability field to a percentage. This automatically tells Lens that this needs to be formatted as a percentage field, so you don’t need to select it manually as well as do calculations. Furthermore, in Discover, instead of seeing 0.5 you see 50%. Isn’t that cool? This is also possible for durations, like event.duration if you have it as nanoseconds! No more calculations on the fly and thinking if you need to divide by 1,000 or 1,000,000.\\n\\n![edit field availability](/assets/images/observability-sla-calculations-transforms/blog-elastic-edit-field-availability.png)\\n\\nWe get this view by using a simple Lens visualization with a timestamp on the vertical axis with the minimum interval for 1 day and an average of availability. Don’t worry — the other data will be populated once the transformation finishes. We can add a reference line using the value 0.98 because our target is 98% uptime of the service.\\n\\n![line](/assets/images/observability-sla-calculations-transforms/blog-elastic-line.png)\\n\\n## Summary\\n\\nThis blog post covered the steps needed to calculate the SLA for a specific data set in Elastic Observability, as well as how to visualize it. Using this calculation method opens the door to a lot of interesting use cases. You can change the bucket script and start calculating the number of sales, and the average basket size. Interested in learning more about Elastic Synthetics? Read [our documentation](https://www.elastic.co/guide/en/observability/current/monitor-uptime-synthetics.html) or check out our free [Synthetic Monitoring Quick Start training](https://www.elastic.co/training/synthetics-quick-start).\\n","code":"var Component=(()=>{var u=Object.create;var s=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),v=(n,e)=>{for(var i in e)s(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!p.call(n,a)&&a!==i&&s(n,a,{get:()=>e[a],enumerable:!(o=m(e,a))||o.enumerable});return n};var y=(n,e,i)=>(i=n!=null?u(f(n)):{},r(e||!n||!n.__esModule?s(i,\\"default\\",{value:n,enumerable:!0}):i,n)),b=n=>r(s({},\\"__esModule\\",{value:!0}),n);var c=w((_,l)=>{l.exports=_jsx_runtime});var T={};v(T,{default:()=>d,frontmatter:()=>k});var t=y(c()),k={title:\\"Monitoring service performance: An overview of SLA calculation for Elastic Observability\\",slug:\\"observability-sla-calculations-transforms\\",date:\\"2023-04-24\\",description:\\"Elastic Stack provides many valuable insights for different users, such as reports on service performance and if the service level agreement (SLA) is met. In this post, we\\\\u2019ll provide an overview of calculating an SLA for Elastic Observability.\\",author:[{slug:\\"philipp-kahr\\"}],image:\\"illustration-analytics-report-1680x980.png\\",tags:[{slug:\\"synthetics\\"},{slug:\\"log-analytics\\"},{slug:\\"slo\\"}]};function h(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Elastic Stack provides many valuable insights for different users. Developers are interested in low-level metrics and debugging information. \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-observability-sre-incident-response\\",rel:\\"nofollow\\",children:\\"SREs\\"}),\\" are interested in seeing everything at once and identifying where the root cause is. Managers want reports that tell them how good service performance is and if the service level agreement (SLA) is met. In this post, we\\\\u2019ll focus on the service perspective and provide an overview of calculating an SLA.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.em,{children:\\"Since version 8.8, we have a built in functionality to calculate SLOs \\\\u2014\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/slo.html\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.em,{children:\\"check out our guide\\"})}),(0,t.jsx)(e.em,{children:\\"!\\"})]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"foundations-of-calculating-an-sla\\",children:\\"Foundations of calculating an SLA\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are many ways to calculate and measure an SLA. The most important part is the definition of the SLA, and as a consultant, I\\\\u2019ve seen many different ways. Some examples include:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Count of HTTP 2xx must be above 98% of all HTTP status\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Response time of successful HTTP 2xx requests must be below x milliseconds\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Synthetic monitor must be up at least 99%\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"95% of all batch transactions from the billing service need to complete within 4 seconds\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Depending on the origin of the data, calculating the SLA can be easier or more difficult. For uptime (Synthetic Monitoring), we automatically provide SLA values and offer out-of-the-box alerts to simply define alert when availability below 98% for the last 1 hour.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-overview-monitor-details.png\\",alt:\\"overview monitor details\\",width:\\"1999\\",height:\\"785\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"I personally recommend using \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/new-synthetic-monitoring-observability\\",rel:\\"nofollow\\",children:\\"Elastic Synthetic Monitoring\\"}),\\" whenever possible to monitor service performance. Running HTTP requests and verifying the answers from the service, or doing fully fledged browser monitors and clicking through the website as a real user does, ensures a better understanding of the health of your service.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Sometimes this is impossible because you want to calculate the uptime of a specific Windows Service that does not offer any TCP port or HTTP interaction. Here the caveat applies that just because the service is running, it does not necessarily imply that the service is working fine.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"transforms-to-the-rescue\\",children:\\"Transforms to the rescue\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We have identified our important service. In our case, it is the Steam Client Helper. There are two ways to solve this.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"lens-formula\\",children:\\"Lens formula\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You can use Lens and formula (for a deep dive into formulas, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/how-tough-was-your-workout-take-a-closer-look-at-strava-data-through-kibana-lens\\",rel:\\"nofollow\\",children:\\"check out this blog\\"}),\\"). Use the Search bar to filter down the data you want. Then use the formula option in Lens. We are dividing all counts of records with Running as a state and dividing it by the overall count of records. This is a nice solution when there is a need to calculate quickly and on the fly.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-sql\\",children:`count(kql=\'windows.service.state: \\"Running\\" \')/count()\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Using the formula posted above as the bar chart\'s vertical axis calculates the uptime percentage. We use an annotation to mark why there is a dip and why this service was below the threshold. The annotation is set to reboot, which indicates a reboot happening, and thus, the service was down for a moment. Lastly, we add a reference line and set this to our defined threshold at 98%. This ensures that a quick look at the visualization allows our eyes to gauge if we are above or below the threshold.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-visualization.png\\",alt:\\"visualization\\",width:\\"1999\\",height:\\"1283\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"transform\\",children:\\"Transform\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"What if I am not interested in just one service, but there are multiple services needed for your SLA? That is where Transforms can solve this problem. Furthermore, the second issue is that this data is only available inside the Lens. Therefore, we cannot create any alerts on this.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Go to Transforms and create a pivot transform.\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\'Add the following filter to narrow it to only services data sets: data_stream.dataset: \\"windows.service\\". If you are interested in a specific service, you can always add it to the search bar if you want to know if a specific remote management service is up in your entire fleet!\'}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Select data histogram(@timestamp) and set it to your chosen unit. By default, the Elastic Agent only collects service states every 60 seconds. I am going with 1 hour.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Select agent.name and windows.service.name as well.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-transform-configuration.png\\",alt:\\"transform configuration\\",width:\\"1999\\",height:\\"750\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"4\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Now we need to define an aggregation type. We will use a value_count of windows.service.state. That just counts how many records have this value.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-aggregations.png\\",alt:\\"aggregations\\",width:\\"1999\\",height:\\"826\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"5\\",children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Rename the value_count to total_count.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Add value_count for windows.service.state a second time and use the pencil icon to edit it to terms, which aggregates for running.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-aggregations-apply.png\\",alt:\\"aggregations apply\\",width:\\"1999\\",height:\\"1068\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"7\\",children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"This opens up a sub-aggregation. Once again, select value_count(windows.service.state) and rename it to values.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Now, the preview shows us the count of records with any states and the count of running.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-transform-configuration-next.png\\",alt:\\"transform configuration\\",width:\\"1999\\",height:\\"1406\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"9\\",children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Here comes the tricky part. We need to write some custom aggregations to calculate the percentage of uptime. Click on the copy icon next to the edit JSON config.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"In a new tab, go to Dev Tools. Paste what you have in the clipboard.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Press the play button or use the keyboard shortcut ctrl+enter/cmd+enter and run it. This will create a preview of what the data looks like. It should give you the same information as in the table preview.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Now, we need to calculate the percentage of up, which means doing a bucket script where we divide running.values by total_count, just like we did in the Lens visualization. Suppose you name the columns differently or use more than a single value. In that case, you will need to adapt accordingly.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`\\"availability\\": {\\n \\"bucket_script\\": {\\n \\"buckets_path\\": {\\n \\"up\\": \\"running>values\\",\\n \\"total\\": \\"total_count\\"\\n },\\n \\"script\\": \\"params.up/params.total\\"\\n }\\n }\\n`})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"13\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"This is the entire transform for me:\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`POST _transform/_preview\\n{\\n \\"source\\": {\\n \\"index\\": [\\n \\"metrics-*\\"\\n ]\\n },\\n \\"pivot\\": {\\n \\"group_by\\": {\\n \\"@timestamp\\": {\\n \\"date_histogram\\": {\\n \\"field\\": \\"@timestamp\\",\\n \\"calendar_interval\\": \\"1h\\"\\n }\\n },\\n \\"agent.name\\": {\\n \\"terms\\": {\\n \\"field\\": \\"agent.name\\"\\n }\\n },\\n \\"windows.service.name\\": {\\n \\"terms\\": {\\n \\"field\\": \\"windows.service.name\\"\\n }\\n }\\n },\\n \\"aggregations\\": {\\n \\"total_count\\": {\\n \\"value_count\\": {\\n \\"field\\": \\"windows.service.state\\"\\n }\\n },\\n \\"running\\": {\\n \\"filter\\": {\\n \\"term\\": {\\n \\"windows.service.state\\": \\"Running\\"\\n }\\n },\\n \\"aggs\\": {\\n \\"values\\": {\\n \\"value_count\\": {\\n \\"field\\": \\"windows.service.state\\"\\n }\\n }\\n }\\n },\\n \\"availability\\": {\\n \\"bucket_script\\": {\\n \\"buckets_path\\": {\\n \\"up\\": \\"running>values\\",\\n \\"total\\": \\"total_count\\"\\n },\\n \\"script\\": \\"params.up/params.total\\"\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"14\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"The preview in Dev Tools should work and be complete. Otherwise, you must debug any errors. Most of the time, it is the bucket script and the path to the values. You might have called it up instead of running. This is what the preview looks like for me.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`{\\n \\"running\\": {\\n \\"values\\": 1\\n },\\n \\"agent\\": {\\n \\"name\\": \\"AnnalenasMac\\"\\n },\\n \\"@timestamp\\": \\"2021-12-07T19:00:00.000Z\\",\\n \\"total_count\\": 1,\\n \\"availability\\": 1,\\n \\"windows\\": {\\n \\"service\\": {\\n \\"name\\": \\"InstallService\\"\\n }\\n }\\n},\\n`})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"15\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Now we only paste the bucket script into the transform creation UI after selecting Edit JSON. It looks like this:\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-transform-configuration-pivot-configuration-object.png\\",alt:\\"transform configuration pivot configuration object\\",width:\\"1999\\",height:\\"1002\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"16\\",children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Give your transform a name, set the destination index, and run it continuously. When selecting this, please also make sure not to use @timestamp. Instead, opt for event.ingested. \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/transform-checkpoints.html\\",rel:\\"nofollow\\",children:\\"Our documentation explains this in detail\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-transform-details.png\\",alt:\\"transform details\\",width:\\"1228\\",height:\\"1674\\"})}),`\\n`,(0,t.jsxs)(e.ol,{start:\\"17\\",children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Click next and create and start. This can take a bit, so don\\\\u2019t worry.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To summarize, we have now created a pivot transform using a bucket script aggregation to calculate the running time of a service in percentage. There is a caveat because Elastic Agent, per default, only collects the every 60 seconds the services state. It can be that a service is up exactly when collected and down a few seconds later. If it is that important and no other monitoring possibilities, such as \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/what-can-elastic-synthetics-tell-us-about-kibana-dashboards\\",rel:\\"nofollow\\",children:\\"Elastic Synthetics\\"}),\\" are possible, you might want to reduce the collection time on the Agent side to get the services state every 30 seconds, 45 seconds. Depending on how important your thresholds are, you can create multiple policies having different collection times. This ensures that a super important server might collect the services state every 10 seconds because you need as much granularity and insurance for the correctness of the metric. For normal workstations where you just want to know if your remote access solution is up the majority of the time, you might not mind having a single metric every 60 seconds.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"After you have created the transform, one additional feature you get is that the data is stored in an index, similar to in Elasticsearch. When you just do the visualization, the metric is calculated for this visualization only and not available anywhere else. Since this is now data, you can create a threshold alert to your favorite connection (Slack, Teams, Service Now, Mail, and so \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/action-types.html\\",rel:\\"nofollow\\",children:\\"many more to choose from\\"}),\\").\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"visualizing-the-transformed-data\\",children:\\"Visualizing the transformed data\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The transform created a data view called windows-service. The first thing we want to do is change the format of the availability field to a percentage. This automatically tells Lens that this needs to be formatted as a percentage field, so you don\\\\u2019t need to select it manually as well as do calculations. Furthermore, in Discover, instead of seeing 0.5 you see 50%. Isn\\\\u2019t that cool? This is also possible for durations, like event.duration if you have it as nanoseconds! No more calculations on the fly and thinking if you need to divide by 1,000 or 1,000,000.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-edit-field-availability.png\\",alt:\\"edit field availability\\",width:\\"1999\\",height:\\"1601\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We get this view by using a simple Lens visualization with a timestamp on the vertical axis with the minimum interval for 1 day and an average of availability. Don\\\\u2019t worry \\\\u2014 the other data will be populated once the transformation finishes. We can add a reference line using the value 0.98 because our target is 98% uptime of the service.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/observability-sla-calculations-transforms/blog-elastic-line.png\\",alt:\\"line\\",width:\\"1999\\",height:\\"1751\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This blog post covered the steps needed to calculate the SLA for a specific data set in Elastic Observability, as well as how to visualize it. Using this calculation method opens the door to a lot of interesting use cases. You can change the bucket script and start calculating the number of sales, and the average basket size. Interested in learning more about Elastic Synthetics? Read \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/monitor-uptime-synthetics.html\\",rel:\\"nofollow\\",children:\\"our documentation\\"}),\\" or check out our free \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/training/synthetics-quick-start\\",rel:\\"nofollow\\",children:\\"Synthetic Monitoring Quick Start training\\"}),\\".\\"]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return b(T);})();\\n;return Component;"},"_id":"articles/observability-sla-calculations-transforms.mdx","_raw":{"sourceFilePath":"articles/observability-sla-calculations-transforms.mdx","sourceFileName":"observability-sla-calculations-transforms.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/observability-sla-calculations-transforms"},"type":"Article","imageUrl":"/assets/images/observability-sla-calculations-transforms/illustration-analytics-report-1680x980.png","readingTime":"12 min read","url":"/observability-sla-calculations-transforms","headings":[{"level":2,"title":"Foundations of calculating an SLA","href":"#foundations-of-calculating-an-sla"},{"level":2,"title":"Transforms to the rescue","href":"#transforms-to-the-rescue"},{"level":3,"title":"Lens formula","href":"#lens-formula"},{"level":3,"title":"Transform","href":"#transform"},{"level":2,"title":"Visualizing the transformed data","href":"#visualizing-the-transformed-data"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Collecting OpenShift container logs using Red Hat’s OpenShift Logging Operator","slug":"openshift-container-logs-red-hat-logging-operator","date":"2024-01-16","description":"Learn how to optimize OpenShift logs collected with Red Hat OpenShift Logging Operator, as well as format and route them efficiently in Elasticsearch.","image":"139687_-_Blog_Header_Banner_V1.jpg","author":[{"slug":"mirko-bez","type":"Author","_raw":{}},{"slug":"david-ricordel","type":"Author","_raw":{}},{"slug":"philipp-kahr","type":"Author","_raw":{}}],"tags":[{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"redhat","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nThis blog explores a possible approach to collecting and formatting OpenShift Container Platform logs and audit logs with Red Hat OpenShift Logging Operator. We recommend using Elastic\xae Agent for the best possible experience! We will also show how to format the logs to Elastic Common Schema ([ECS](https://www.elastic.co/guide/en/ecs/current/index.html)) for the best experience viewing, searching, and visualizing your logs. All examples in this blog are based on OpenShift 4.14.\\n\\n## Why use OpenShift Logging Operator?\\n\\nA lot of enterprise customers use OpenShift as their orchestrating solution. The advantages of this approach are:\\n\\n- It is developed and supported by Red Hat\\n\\n- It can automatically update the OpenShift cluster along with the Operating system to make sure that they are and remain compatible\\n\\n- It can speed up developing life cycles with features like source to image\\n\\n- It uses enhanced security\\n\\nIn our consulting experience, this latter aspect poses challenges and frictions with OpenShift administrators when we try to install an Elastic Agent to collect the logs of the pods. Indeed, Elastic Agent requires the files of the host to be mounted in the pod, and it also needs to be run in privileged mode. (Read more about the permissions required by Elastic Agent in the [official Elasticsearch\xae Documentation](https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-standalone.html#_red_hat_openshift_configuration)). While the solution we explore in this post requires similar privileges under the hood, it is managed by the OpenShift Logging Operator, which is developed and supported by Red Hat.\\n\\n## Which logs are we going to collect?\\n\\nIn OpenShift Container Platform, we distinguish [three broad categories of logs](https://docs.openshift.com/container-platform/4.14/logging/cluster-logging.html#logging-architecture-overview_cluster-logging): audit, application, and infrastructure logs:\\n\\n- **Audit logs** describe the list of activities that affected the system by users, administrators, and other components.\\n\\n- **Application logs** are composed of the container logs of the pods running in non-reserved namespaces.\\n\\n- **Infrastructure logs** are composed of container logs of the pods running in reserved namespaces like openshift\\\\*, kube\\\\*, and default along with journald messages from the nodes.\\n\\nIn the following, we will consider only audit and application logs for the sake of simplicity. In this post, we will describe how to format audit and application Logs in the format expected by the Kubernetes integration to take the most out of Elastic Observability.\\n\\n## Getting started\\n\\nTo collect the logs from OpenShift, we must perform some preparation steps in Elasticsearch and OpenShift.\\n\\n### Inside Elasticsearch\\n\\nWe first [install the Kubernetes integration assets](https://www.elastic.co/guide/en/fleet/8.11/install-uninstall-integration-assets.html#install-integration-assets). We are mainly interested in the index templates and ingest pipelines for the logs-kubernetes.container_logs and logs-kubernetes.audit_logs.\\n\\nTo format the logs received from the ClusterLogForwarder in [ECS](https://www.elastic.co/guide/en/ecs/current/index.html) format, we will define a pipeline to normalize the container logs. The field naming convention used by OpenShift is slightly different from that used by ECS. To get a list of exported fields from OpenShift, refer to [Exported fields | Logging | OpenShift Container Platform 4.14](https://docs.openshift.com/container-platform/4.14/logging/cluster-logging-exported-fields.html). To get a list of exported fields of the Kubernetes integration, you can refer to [Kubernetes fields | Filebeat Reference [8.11] | Elastic](https://www.elastic.co/guide/en/beats/filebeat/current/exported-fields-kubernetes-processor.html) and [Logs app fields | Elastic Observability [8.11]](https://www.elastic.co/guide/en/observability/current/logs-app-fields.html). Further, specific fields like kubernetes.annotations must be normalized by replacing dots with underscores. This operation is usually done automatically by Elastic Agent.\\n\\n```bash\\nPUT _ingest/pipeline/openshift-2-ecs\\n{\\n \\"processors\\": [\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.pod_id\\",\\n \\"target_field\\": \\"kubernetes.pod.uid\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.pod_ip\\",\\n \\"target_field\\": \\"kubernetes.pod.ip\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.pod_name\\",\\n \\"target_field\\": \\"kubernetes.pod.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.namespace_name\\",\\n \\"target_field\\": \\"kubernetes.namespace\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.namespace_id\\",\\n \\"target_field\\": \\"kubernetes.namespace_uid\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.container_id\\",\\n \\"target_field\\": \\"container.id\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"dissect\\": {\\n \\"field\\": \\"container.id\\",\\n \\"pattern\\": \\"%{container.runtime}://%{container.id}\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.container_image\\",\\n \\"target_field\\": \\"container.image.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"kubernetes.container.image\\",\\n \\"copy_from\\": \\"container.image.name\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"copy_from\\": \\"kubernetes.container_name\\",\\n \\"field\\": \\"container.name\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.container_name\\",\\n \\"target_field\\": \\"kubernetes.container.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"kubernetes.node.name\\",\\n \\"copy_from\\": \\"hostname\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"hostname\\",\\n \\"target_field\\": \\"host.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"level\\",\\n \\"target_field\\": \\"log.level\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"file\\",\\n \\"target_field\\": \\"log.file.path\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"copy_from\\": \\"openshift.cluster_id\\",\\n \\"field\\": \\"orchestrator.cluster.name\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"dissect\\": {\\n \\"field\\": \\"kubernetes.pod_owner\\",\\n \\"pattern\\": \\"%{_tmp.parent_type}/%{_tmp.parent_name}\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"lowercase\\": {\\n \\"field\\": \\"_tmp.parent_type\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"kubernetes.pod.{{_tmp.parent_type}}.name\\",\\n \\"value\\": \\"{{_tmp.parent_name}}\\",\\n \\"if\\": \\"ctx?._tmp?.parent_type != null\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"field\\": [\\n \\"_tmp\\",\\n \\"kubernetes.pod_owner\\"\\n ],\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"script\\": {\\n \\"description\\": \\"Normalize kubernetes annotations\\",\\n \\"if\\": \\"ctx?.kubernetes?.annotations != null\\",\\n \\"source\\": \\"\\"\\"\\n def keys = new ArrayList(ctx.kubernetes.annotations.keySet());\\n for(k in keys) {\\n if (k.indexOf(\\".\\") >= 0) {\\n def sanitizedKey = k.replace(\\".\\", \\"_\\");\\n ctx.kubernetes.annotations[sanitizedKey] = ctx.kubernetes.annotations[k];\\n ctx.kubernetes.annotations.remove(k);\\n }\\n }\\n \\"\\"\\"\\n }\\n },\\n {\\n \\"script\\": {\\n \\"description\\": \\"Normalize kubernetes namespace_labels\\",\\n \\"if\\": \\"ctx?.kubernetes?.namespace_labels != null\\",\\n \\"source\\": \\"\\"\\"\\n def keys = new ArrayList(ctx.kubernetes.namespace_labels.keySet());\\n for(k in keys) {\\n if (k.indexOf(\\".\\") >= 0) {\\n def sanitizedKey = k.replace(\\".\\", \\"_\\");\\n ctx.kubernetes.namespace_labels[sanitizedKey] = ctx.kubernetes.namespace_labels[k];\\n ctx.kubernetes.namespace_labels.remove(k);\\n }\\n }\\n \\"\\"\\"\\n }\\n },\\n {\\n \\"script\\": {\\n \\"description\\": \\"Normalize special Kubernetes Labels used in logs-kubernetes.container_logs to determine service.name and service.version\\",\\n \\"if\\": \\"ctx?.kubernetes?.labels != null\\",\\n \\"source\\": \\"\\"\\"\\n def keys = new ArrayList(ctx.kubernetes.labels.keySet());\\n for(k in keys) {\\n if (k.startsWith(\\"app_kubernetes_io_component_\\")) {\\n def sanitizedKey = k.replace(\\"app_kubernetes_io_component_\\", \\"app_kubernetes_io_component/\\");\\n ctx.kubernetes.labels[sanitizedKey] = ctx.kubernetes.labels[k];\\n ctx.kubernetes.labels.remove(k);\\n }\\n }\\n \\"\\"\\"\\n }\\n }\\n ]\\n}\\n```\\n\\nSimilarly, to handle the audit logs like the ones collected by Kubernetes, we define an ingest pipeline:\\n\\n```bash\\nPUT _ingest/pipeline/openshift-audit-2-ecs\\n{\\n \\"processors\\": [\\n {\\n \\"script\\": {\\n \\"source\\": \\"\\"\\"\\n def audit = [:];\\n def keyToRemove = [];\\n for(k in ctx.keySet()) {\\n if (k.indexOf(\'_\') != 0 && ![\'@timestamp\', \'data_stream\', \'openshift\', \'event\', \'hostname\'].contains(k)) {\\n audit[k] = ctx[k];\\n keyToRemove.add(k);\\n }\\n }\\n for(k in keyToRemove) {\\n ctx.remove(k);\\n }\\n ctx.kubernetes=[\\"audit\\":audit];\\n \\"\\"\\",\\n \\"description\\": \\"Move all the \'kubernetes.audit\' fields under \'kubernetes.audit\' object\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"copy_from\\": \\"openshift.cluster_id\\",\\n \\"field\\": \\"orchestrator.cluster.name\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"kubernetes.node.name\\",\\n \\"copy_from\\": \\"hostname\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"hostname\\",\\n \\"target_field\\": \\"host.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"script\\": {\\n \\"if\\": \\"ctx?.kubernetes?.audit?.annotations != null\\",\\n \\"source\\": \\"\\"\\"\\n def keys = new ArrayList(ctx.kubernetes.audit.annotations.keySet());\\n for(k in keys) {\\n if (k.indexOf(\\".\\") >= 0) {\\n def sanitizedKey = k.replace(\\".\\", \\"_\\");\\n ctx.kubernetes.audit.annotations[sanitizedKey] = ctx.kubernetes.audit.annotations[k];\\n ctx.kubernetes.audit.annotations.remove(k);\\n }\\n }\\n \\"\\"\\",\\n \\"description\\": \\"Normalize kubernetes audit annotations field as expected by the Integration\\"\\n }\\n }\\n ]\\n}\\n```\\n\\nThe main objective of the pipeline is to mimic what Elastic Agent is doing: storing all audit fields under the kubernetes.audit object.\\n\\nWe are not going to use the conventional @custom pipeline approach because the fields must be normalized before invoking the logs-kubernetes.container_logs integration pipeline that uses fields like kubernetes.container.name and kubernetes.labels to determine the fields service.name and service.version. Read more about custom pipelines in [Tutorial: Transform data with custom ingest pipelines | Fleet and Elastic Agent Guide [8.11]](https://www.elastic.co/guide/en/fleet/8.11/data-streams-pipeline-tutorial.html#data-streams-pipeline-one).\\n\\nThe OpenShift Cluster Log Forwarder writes the data in the indices app-write and audit-write by default. It is possible to change this behavior, but it still tries to prepend the prefix “app” and the suffix “write”, so we opted to send the data to the default destination and use the reroute processor to send it to the right data streams. Read more about the Reroute Processor in our blog [Simplifying log data management: Harness the power of flexible routing with Elastic](https://www.elastic.co/blog/simplifying-log-data-management-flexible-routing-elastic) and our documentation [Reroute processor | Elasticsearch Guide [8.11] | Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/reroute-processor.html).\\n\\nIn this case, we want to redirect the container logs (app-write index) to logs-kubernetes.container_logs and the Audit logs (audit-write) to logs-kubernetes.audit_logs:\\n\\n```bash\\nPUT _ingest/pipeline/app-write-reroute-pipeline\\n{\\n \\"processors\\": [\\n {\\n \\"pipeline\\": {\\n \\"name\\": \\"openshift-2-ecs\\",\\n \\"description\\": \\"Format the Openshift data in ECS\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"event.dataset\\",\\n \\"value\\": \\"kubernetes.container_logs\\"\\n }\\n },\\n {\\n \\"reroute\\": {\\n \\"destination\\": \\"logs-kubernetes.container_logs-openshift\\"\\n }\\n }\\n ]\\n}\\n\\n\\n\\nPUT _ingest/pipeline/audit-write-reroute-pipeline\\n{\\n \\"processors\\": [\\n {\\n \\"pipeline\\": {\\n \\"name\\": \\"openshift-audit-2-ecs\\",\\n \\"description\\": \\"Format the Openshift data in ECS\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"event.dataset\\",\\n \\"value\\": \\"kubernetes.audit_logs\\"\\n }\\n },\\n {\\n \\"reroute\\": {\\n \\"destination\\": \\"logs-kubernetes.audit_logs-openshift\\"\\n }\\n }\\n ]\\n}\\n```\\n\\nPlease note that given that app-write and audit-write do not follow the data stream naming convention, we are forced to add the destination field in the reroute processor. The reroute processor will also fill up the [data_stream fields](https://www.elastic.co/guide/en/ecs/8.11/ecs-data_stream.html) for us. Note that this step is done automatically by Elastic Agent at source.\\n\\nFurther, we create the indices with the default pipelines we created to reroute the logs according to our needs.\\n\\n```bash\\nPUT app-write\\n{\\n \\"settings\\": {\\n \\"index.default_pipeline\\": \\"app-write-reroute-pipeline\\"\\n }\\n}\\n\\n\\nPUT audit-write\\n{\\n \\"settings\\": {\\n \\"index.default_pipeline\\": \\"audit-write-reroute-pipeline\\"\\n }\\n}\\n```\\n\\nBasically, what we did can be summarized in this picture:\\n\\n![openshift-summary-blog](/assets/images/openshift-container-logs-red-hat-logging-operator/openshift-summary-blog.png)\\n\\nLet us take the container logs. When the operator attempts to write in the app-write index, it will invoke the default_pipeline “app-write-reroute-pipeline” that formats the logs into ECS format and reroutes the logs to logs-kubernetes.container_logs-openshift datastreams. This calls the integration pipeline that invokes, if it exists, the logs-kubernetes.container_logs@custom pipeline. Finally, the logs-kubernetes_container_logs pipeline may reroute the logs to another data set and namespace utilizing the elastic.co/dataset and elastic.co/namespace annotations as described in the Kubernetes [integration documentation](https://docs.elastic.co/integrations/kubernetes/container-logs#rerouting-based-on-pod-annotations), which in turn can lead to the execution of an another integration pipeline.\\n\\n### Create a user for sending the logs\\n\\nWe are going to use basic authentication because, at the time of writing, it is the only supported authentication method for Elasticsearch in OpenShift logging. Thus, we need a role that allows the user to write and read the app-write, and audit-write logs (required by the OpenShift agent) and auto_configure access to logs-\\\\*-\\\\* to allow custom Kubernetes rerouting:\\n\\n```bash\\nPUT _security/role/YOURROLE\\n{\\n \\"cluster\\": [\\n \\"monitor\\"\\n ],\\n \\"indices\\": [\\n {\\n \\"names\\": [\\n \\"logs-*-*\\"\\n ],\\n \\"privileges\\": [\\n \\"auto_configure\\",\\n \\"create_doc\\"\\n ],\\n \\"allow_restricted_indices\\": false\\n },\\n {\\n \\"names\\": [\\n \\"app-write\\",\\n \\"audit-write\\",\\n ],\\n \\"privileges\\": [\\n \\"create_doc\\",\\n \\"read\\"\\n ],\\n \\"allow_restricted_indices\\": false\\n }\\n ],\\n \\"applications\\": [],\\n \\"run_as\\": [],\\n \\"metadata\\": {},\\n \\"transient_metadata\\": {\\n \\"enabled\\": true\\n }\\n\\n}\\n\\n\\n\\nPUT _security/user/YOUR_USERNAME\\n{\\n \\"password\\": \\"YOUR_PASSWORD\\",\\n \\"roles\\": [\\"YOURROLE\\"]\\n}\\n```\\n\\n### On OpenShift\\n\\nOn the OpenShift Cluster, we need to follow the [official documentation](https://docs.openshift.com/container-platform/4.14/logging/log_collection_forwarding/log-forwarding.html) of Red Hat on how to install the Red Hat OpenShift Logging and configure Cluster Logging and the Cluster Log Forwarder.\\n\\nWe need to install the Red Hat OpenShift Logging Operator, which defines the ClusterLogging and ClusterLogForwarder Resources. Afterward, we can define the Cluster Logging resource:\\n\\n```yaml\\napiVersion: logging.openshift.io/v1\\nkind: ClusterLogging\\nmetadata:\\n name: instance\\n namespace: openshift-logging\\nspec:\\n collection:\\n logs:\\n type: vector\\n vector: {}\\n```\\n\\nThe Cluster Log Forwarder is the resource responsible for defining a daemon set that will forward the logs to the remote Elasticsearch. Before creating it, we need to create in the same namespace as the ClusterLogForwarder a secret containing the Elasticsearch credentials for the user we created previously in the namespace, where the ClusterLogForwarder will be deployed:\\n\\n```yaml\\napiVersion: v1\\nkind: Secret\\nmetadata:\\n name: elasticsearch-password\\n namespace: openshift-logging\\ntype: Opaque\\nstringData:\\n username: YOUR_USERNAME\\n password: YOUR_PASSWORD\\n```\\n\\nFinally, we create the ClusterLogForwarder resource:\\n\\n```yaml\\nkind: ClusterLogForwarder\\napiVersion: logging.openshift.io/v1\\nmetadata:\\n name: instance\\n namespace: openshift-logging\\nspec:\\n outputs:\\n - name: remote-elasticsearch\\n secret:\\n name: elasticsearch-password\\n type: elasticsearch\\n url: \\"https://YOUR_ELASTICSEARCH_URL:443\\"\\n elasticsearch:\\n version: 8 # The default is version 6 with the _type field\\n pipelines:\\n - inputRefs:\\n - application\\n - audit\\n name: enable-default-log-store\\n outputRefs:\\n - remote-elasticsearch\\n```\\n\\nNote that we explicitly defined the version of Elasticsearch to be 8, otherwise the ClusterLogForwarder will send the \\\\_type field, which is not compatible with Elasticsearch 8 and that we collect only application and audit logs.\\n\\n## Result\\n\\nOnce the logs are collected and passed through all the pipelines, the result is very close to the out-of-the-box Kubernetes integration. There are important differences, like the lack of host and cloud metadata information that don’t seem to be collected (at least without an additional configuration). We can view the Kubernetes container logs in the logs explorer:\\n\\n![openshift-summary-blog-graphs](/assets/images/openshift-container-logs-red-hat-logging-operator/openshift-summary-blog-graphs.png)\\n\\nIn this post, we described how you can use the OpenShift Logging Operator to collect the logs of containers and audit logs. We still recommend leveraging Elastic Agent to collect all your logs. It is the best user experience you can get. No need to maintain or transform the logs yourself to ECS formatting. Additionally, Elastic Agent uses API keys as the authentication method and collects metadata like cloud information that allow you in the long run to do [more](https://www.elastic.co/blog/optimize-cloud-resources-cost-apm-metadata-elastic-observability).\\n\\n[Learn more about log monitoring with the Elastic Stack](https://www.elastic.co/observability/log-monitoring).\\n\\n_Have feedback on this blog?_ [_Share it here_](https://github.com/herrBez/elastic-blog-openshift-logging/issues)_._\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var b=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),w=(t,e)=>{for(var i in e)o(t,i,{get:e[i],enumerable:!0})},s=(t,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of u(e))!m.call(t,r)&&r!==i&&o(t,r,{get:()=>e[r],enumerable:!(a=g(e,r))||a.enumerable});return t};var _=(t,e,i)=>(i=t!=null?p(f(t)):{},s(e||!t||!t.__esModule?o(i,\\"default\\",{value:t,enumerable:!0}):i,t)),k=t=>s(o({},\\"__esModule\\",{value:!0}),t);var c=b((O,l)=>{l.exports=_jsx_runtime});var v={};w(v,{default:()=>h,frontmatter:()=>y});var n=_(c()),y={title:\\"Collecting OpenShift container logs using Red Hat\\\\u2019s OpenShift Logging Operator\\",slug:\\"openshift-container-logs-red-hat-logging-operator\\",date:\\"2024-01-16\\",description:\\"Learn how to optimize OpenShift logs collected with Red Hat OpenShift Logging Operator, as well as format and route them efficiently in Elasticsearch.\\",author:[{slug:\\"mirko-bez\\"},{slug:\\"david-ricordel\\"},{slug:\\"philipp-kahr\\"}],image:\\"139687_-_Blog_Header_Banner_V1.jpg\\",tags:[{slug:\\"kubernetes\\"},{slug:\\"redhat\\"},{slug:\\"log-analytics\\"}]};function d(t){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"This blog explores a possible approach to collecting and formatting OpenShift Container Platform logs and audit logs with Red Hat OpenShift Logging Operator. We recommend using Elastic\\\\xAE Agent for the best possible experience! We will also show how to format the logs to Elastic Common Schema (\\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/index.html\\",rel:\\"nofollow\\",children:\\"ECS\\"}),\\") for the best experience viewing, searching, and visualizing your logs. All examples in this blog are based on OpenShift 4.14.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"why-use-openshift-logging-operator\\",children:\\"Why use OpenShift Logging Operator?\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"A lot of enterprise customers use OpenShift as their orchestrating solution. The advantages of this approach are:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"It is developed and supported by Red Hat\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"It can automatically update the OpenShift cluster along with the Operating system to make sure that they are and remain compatible\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"It can speed up developing life cycles with features like source to image\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"It uses enhanced security\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In our consulting experience, this latter aspect poses challenges and frictions with OpenShift administrators when we try to install an Elastic Agent to collect the logs of the pods. Indeed, Elastic Agent requires the files of the host to be mounted in the pod, and it also needs to be run in privileged mode. (Read more about the permissions required by Elastic Agent in the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/running-on-kubernetes-standalone.html#_red_hat_openshift_configuration\\",rel:\\"nofollow\\",children:\\"official Elasticsearch\\\\xAE Documentation\\"}),\\"). While the solution we explore in this post requires similar privileges under the hood, it is managed by the OpenShift Logging Operator, which is developed and supported by Red Hat.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"which-logs-are-we-going-to-collect\\",children:\\"Which logs are we going to collect?\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In OpenShift Container Platform, we distinguish \\",(0,n.jsx)(e.a,{href:\\"https://docs.openshift.com/container-platform/4.14/logging/cluster-logging.html#logging-architecture-overview_cluster-logging\\",rel:\\"nofollow\\",children:\\"three broad categories of logs\\"}),\\": audit, application, and infrastructure logs:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Audit logs\\"}),\\" describe the list of activities that affected the system by users, administrators, and other components.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Application logs\\"}),\\" are composed of the container logs of the pods running in non-reserved namespaces.\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Infrastructure logs\\"}),\\" are composed of container logs of the pods running in reserved namespaces like openshift*, kube*, and default along with journald messages from the nodes.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In the following, we will consider only audit and application logs for the sake of simplicity. In this post, we will describe how to format audit and application Logs in the format expected by the Kubernetes integration to take the most out of Elastic Observability.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"getting-started\\",children:\\"Getting started\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"To collect the logs from OpenShift, we must perform some preparation steps in Elasticsearch and OpenShift.\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"inside-elasticsearch\\",children:\\"Inside Elasticsearch\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We first \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/8.11/install-uninstall-integration-assets.html#install-integration-assets\\",rel:\\"nofollow\\",children:\\"install the Kubernetes integration assets\\"}),\\". We are mainly interested in the index templates and ingest pipelines for the logs-kubernetes.container_logs and logs-kubernetes.audit_logs.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To format the logs received from the ClusterLogForwarder in \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/index.html\\",rel:\\"nofollow\\",children:\\"ECS\\"}),\\" format, we will define a pipeline to normalize the container logs. The field naming convention used by OpenShift is slightly different from that used by ECS. To get a list of exported fields from OpenShift, refer to \\",(0,n.jsx)(e.a,{href:\\"https://docs.openshift.com/container-platform/4.14/logging/cluster-logging-exported-fields.html\\",rel:\\"nofollow\\",children:\\"Exported fields | Logging | OpenShift Container Platform 4.14\\"}),\\". To get a list of exported fields of the Kubernetes integration, you can refer to \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/beats/filebeat/current/exported-fields-kubernetes-processor.html\\",rel:\\"nofollow\\",children:\\"Kubernetes fields | Filebeat Reference [8.11] | Elastic\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/logs-app-fields.html\\",rel:\\"nofollow\\",children:\\"Logs app fields | Elastic Observability [8.11]\\"}),\\". Further, specific fields like kubernetes.annotations must be normalized by replacing dots with underscores. This operation is usually done automatically by Elastic Agent.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/openshift-2-ecs\\n{\\n \\"processors\\": [\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.pod_id\\",\\n \\"target_field\\": \\"kubernetes.pod.uid\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.pod_ip\\",\\n \\"target_field\\": \\"kubernetes.pod.ip\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.pod_name\\",\\n \\"target_field\\": \\"kubernetes.pod.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.namespace_name\\",\\n \\"target_field\\": \\"kubernetes.namespace\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.namespace_id\\",\\n \\"target_field\\": \\"kubernetes.namespace_uid\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.container_id\\",\\n \\"target_field\\": \\"container.id\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"dissect\\": {\\n \\"field\\": \\"container.id\\",\\n \\"pattern\\": \\"%{container.runtime}://%{container.id}\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.container_image\\",\\n \\"target_field\\": \\"container.image.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"kubernetes.container.image\\",\\n \\"copy_from\\": \\"container.image.name\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"copy_from\\": \\"kubernetes.container_name\\",\\n \\"field\\": \\"container.name\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"kubernetes.container_name\\",\\n \\"target_field\\": \\"kubernetes.container.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"kubernetes.node.name\\",\\n \\"copy_from\\": \\"hostname\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"hostname\\",\\n \\"target_field\\": \\"host.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"level\\",\\n \\"target_field\\": \\"log.level\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"file\\",\\n \\"target_field\\": \\"log.file.path\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"copy_from\\": \\"openshift.cluster_id\\",\\n \\"field\\": \\"orchestrator.cluster.name\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"dissect\\": {\\n \\"field\\": \\"kubernetes.pod_owner\\",\\n \\"pattern\\": \\"%{_tmp.parent_type}/%{_tmp.parent_name}\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"lowercase\\": {\\n \\"field\\": \\"_tmp.parent_type\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"kubernetes.pod.{{_tmp.parent_type}}.name\\",\\n \\"value\\": \\"{{_tmp.parent_name}}\\",\\n \\"if\\": \\"ctx?._tmp?.parent_type != null\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"field\\": [\\n \\"_tmp\\",\\n \\"kubernetes.pod_owner\\"\\n ],\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"script\\": {\\n \\"description\\": \\"Normalize kubernetes annotations\\",\\n \\"if\\": \\"ctx?.kubernetes?.annotations != null\\",\\n \\"source\\": \\"\\"\\"\\n def keys = new ArrayList(ctx.kubernetes.annotations.keySet());\\n for(k in keys) {\\n if (k.indexOf(\\".\\") >= 0) {\\n def sanitizedKey = k.replace(\\".\\", \\"_\\");\\n ctx.kubernetes.annotations[sanitizedKey] = ctx.kubernetes.annotations[k];\\n ctx.kubernetes.annotations.remove(k);\\n }\\n }\\n \\"\\"\\"\\n }\\n },\\n {\\n \\"script\\": {\\n \\"description\\": \\"Normalize kubernetes namespace_labels\\",\\n \\"if\\": \\"ctx?.kubernetes?.namespace_labels != null\\",\\n \\"source\\": \\"\\"\\"\\n def keys = new ArrayList(ctx.kubernetes.namespace_labels.keySet());\\n for(k in keys) {\\n if (k.indexOf(\\".\\") >= 0) {\\n def sanitizedKey = k.replace(\\".\\", \\"_\\");\\n ctx.kubernetes.namespace_labels[sanitizedKey] = ctx.kubernetes.namespace_labels[k];\\n ctx.kubernetes.namespace_labels.remove(k);\\n }\\n }\\n \\"\\"\\"\\n }\\n },\\n {\\n \\"script\\": {\\n \\"description\\": \\"Normalize special Kubernetes Labels used in logs-kubernetes.container_logs to determine service.name and service.version\\",\\n \\"if\\": \\"ctx?.kubernetes?.labels != null\\",\\n \\"source\\": \\"\\"\\"\\n def keys = new ArrayList(ctx.kubernetes.labels.keySet());\\n for(k in keys) {\\n if (k.startsWith(\\"app_kubernetes_io_component_\\")) {\\n def sanitizedKey = k.replace(\\"app_kubernetes_io_component_\\", \\"app_kubernetes_io_component/\\");\\n ctx.kubernetes.labels[sanitizedKey] = ctx.kubernetes.labels[k];\\n ctx.kubernetes.labels.remove(k);\\n }\\n }\\n \\"\\"\\"\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Similarly, to handle the audit logs like the ones collected by Kubernetes, we define an ingest pipeline:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/openshift-audit-2-ecs\\n{\\n \\"processors\\": [\\n {\\n \\"script\\": {\\n \\"source\\": \\"\\"\\"\\n def audit = [:];\\n def keyToRemove = [];\\n for(k in ctx.keySet()) {\\n if (k.indexOf(\'_\') != 0 && ![\'@timestamp\', \'data_stream\', \'openshift\', \'event\', \'hostname\'].contains(k)) {\\n audit[k] = ctx[k];\\n keyToRemove.add(k);\\n }\\n }\\n for(k in keyToRemove) {\\n ctx.remove(k);\\n }\\n ctx.kubernetes=[\\"audit\\":audit];\\n \\"\\"\\",\\n \\"description\\": \\"Move all the \'kubernetes.audit\' fields under \'kubernetes.audit\' object\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"copy_from\\": \\"openshift.cluster_id\\",\\n \\"field\\": \\"orchestrator.cluster.name\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"kubernetes.node.name\\",\\n \\"copy_from\\": \\"hostname\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"hostname\\",\\n \\"target_field\\": \\"host.name\\",\\n \\"ignore_missing\\": true\\n }\\n },\\n {\\n \\"script\\": {\\n \\"if\\": \\"ctx?.kubernetes?.audit?.annotations != null\\",\\n \\"source\\": \\"\\"\\"\\n def keys = new ArrayList(ctx.kubernetes.audit.annotations.keySet());\\n for(k in keys) {\\n if (k.indexOf(\\".\\") >= 0) {\\n def sanitizedKey = k.replace(\\".\\", \\"_\\");\\n ctx.kubernetes.audit.annotations[sanitizedKey] = ctx.kubernetes.audit.annotations[k];\\n ctx.kubernetes.audit.annotations.remove(k);\\n }\\n }\\n \\"\\"\\",\\n \\"description\\": \\"Normalize kubernetes audit annotations field as expected by the Integration\\"\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The main objective of the pipeline is to mimic what Elastic Agent is doing: storing all audit fields under the kubernetes.audit object.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We are not going to use the conventional @custom pipeline approach because the fields must be normalized before invoking the logs-kubernetes.container_logs integration pipeline that uses fields like kubernetes.container.name and kubernetes.labels to determine the fields service.name and service.version. Read more about custom pipelines in \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/8.11/data-streams-pipeline-tutorial.html#data-streams-pipeline-one\\",rel:\\"nofollow\\",children:\\"Tutorial: Transform data with custom ingest pipelines | Fleet and Elastic Agent Guide [8.11]\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The OpenShift Cluster Log Forwarder writes the data in the indices app-write and audit-write by default. It is possible to change this behavior, but it still tries to prepend the prefix \\\\u201Capp\\\\u201D and the suffix \\\\u201Cwrite\\\\u201D, so we opted to send the data to the default destination and use the reroute processor to send it to the right data streams. Read more about the Reroute Processor in our blog \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/simplifying-log-data-management-flexible-routing-elastic\\",rel:\\"nofollow\\",children:\\"Simplifying log data management: Harness the power of flexible routing with Elastic\\"}),\\" and our documentation \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/reroute-processor.html\\",rel:\\"nofollow\\",children:\\"Reroute processor | Elasticsearch Guide [8.11] | Elastic\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this case, we want to redirect the container logs (app-write index) to logs-kubernetes.container_logs and the Audit logs (audit-write) to logs-kubernetes.audit_logs:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/app-write-reroute-pipeline\\n{\\n \\"processors\\": [\\n {\\n \\"pipeline\\": {\\n \\"name\\": \\"openshift-2-ecs\\",\\n \\"description\\": \\"Format the Openshift data in ECS\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"event.dataset\\",\\n \\"value\\": \\"kubernetes.container_logs\\"\\n }\\n },\\n {\\n \\"reroute\\": {\\n \\"destination\\": \\"logs-kubernetes.container_logs-openshift\\"\\n }\\n }\\n ]\\n}\\n\\n\\n\\nPUT _ingest/pipeline/audit-write-reroute-pipeline\\n{\\n \\"processors\\": [\\n {\\n \\"pipeline\\": {\\n \\"name\\": \\"openshift-audit-2-ecs\\",\\n \\"description\\": \\"Format the Openshift data in ECS\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"event.dataset\\",\\n \\"value\\": \\"kubernetes.audit_logs\\"\\n }\\n },\\n {\\n \\"reroute\\": {\\n \\"destination\\": \\"logs-kubernetes.audit_logs-openshift\\"\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Please note that given that app-write and audit-write do not follow the data stream naming convention, we are forced to add the destination field in the reroute processor. The reroute processor will also fill up the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/8.11/ecs-data_stream.html\\",rel:\\"nofollow\\",children:\\"data_stream fields\\"}),\\" for us. Note that this step is done automatically by Elastic Agent at source.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Further, we create the indices with the default pipelines we created to reroute the logs according to our needs.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT app-write\\n{\\n \\"settings\\": {\\n \\"index.default_pipeline\\": \\"app-write-reroute-pipeline\\"\\n }\\n}\\n\\n\\nPUT audit-write\\n{\\n \\"settings\\": {\\n \\"index.default_pipeline\\": \\"audit-write-reroute-pipeline\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Basically, what we did can be summarized in this picture:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/openshift-container-logs-red-hat-logging-operator/openshift-summary-blog.png\\",alt:\\"openshift-summary-blog\\",width:\\"1207\\",height:\\"657\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Let us take the container logs. When the operator attempts to write in the app-write index, it will invoke the default_pipeline \\\\u201Capp-write-reroute-pipeline\\\\u201D that formats the logs into ECS format and reroutes the logs to logs-kubernetes.container_logs-openshift datastreams. This calls the integration pipeline that invokes, if it exists, the logs-kubernetes.container_logs@custom pipeline. Finally, the logs-kubernetes_container_logs pipeline may reroute the logs to another data set and namespace utilizing the elastic.co/dataset and elastic.co/namespace annotations as described in the Kubernetes \\",(0,n.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/kubernetes/container-logs#rerouting-based-on-pod-annotations\\",rel:\\"nofollow\\",children:\\"integration documentation\\"}),\\", which in turn can lead to the execution of an another integration pipeline.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"create-a-user-for-sending-the-logs\\",children:\\"Create a user for sending the logs\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We are going to use basic authentication because, at the time of writing, it is the only supported authentication method for Elasticsearch in OpenShift logging. Thus, we need a role that allows the user to write and read the app-write, and audit-write logs (required by the OpenShift agent) and auto_configure access to logs-*-* to allow custom Kubernetes rerouting:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _security/role/YOURROLE\\n{\\n \\"cluster\\": [\\n \\"monitor\\"\\n ],\\n \\"indices\\": [\\n {\\n \\"names\\": [\\n \\"logs-*-*\\"\\n ],\\n \\"privileges\\": [\\n \\"auto_configure\\",\\n \\"create_doc\\"\\n ],\\n \\"allow_restricted_indices\\": false\\n },\\n {\\n \\"names\\": [\\n \\"app-write\\",\\n \\"audit-write\\",\\n ],\\n \\"privileges\\": [\\n \\"create_doc\\",\\n \\"read\\"\\n ],\\n \\"allow_restricted_indices\\": false\\n }\\n ],\\n \\"applications\\": [],\\n \\"run_as\\": [],\\n \\"metadata\\": {},\\n \\"transient_metadata\\": {\\n \\"enabled\\": true\\n }\\n\\n}\\n\\n\\n\\nPUT _security/user/YOUR_USERNAME\\n{\\n \\"password\\": \\"YOUR_PASSWORD\\",\\n \\"roles\\": [\\"YOURROLE\\"]\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"on-openshift\\",children:\\"On OpenShift\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"On the OpenShift Cluster, we need to follow the \\",(0,n.jsx)(e.a,{href:\\"https://docs.openshift.com/container-platform/4.14/logging/log_collection_forwarding/log-forwarding.html\\",rel:\\"nofollow\\",children:\\"official documentation\\"}),\\" of Red Hat on how to install the Red Hat OpenShift Logging and configure Cluster Logging and the Cluster Log Forwarder.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"We need to install the Red Hat OpenShift Logging Operator, which defines the ClusterLogging and ClusterLogForwarder Resources. Afterward, we can define the Cluster Logging resource:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`apiVersion: logging.openshift.io/v1\\nkind: ClusterLogging\\nmetadata:\\n name: instance\\n namespace: openshift-logging\\nspec:\\n collection:\\n logs:\\n type: vector\\n vector: {}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The Cluster Log Forwarder is the resource responsible for defining a daemon set that will forward the logs to the remote Elasticsearch. Before creating it, we need to create in the same namespace as the ClusterLogForwarder a secret containing the Elasticsearch credentials for the user we created previously in the namespace, where the ClusterLogForwarder will be deployed:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`apiVersion: v1\\nkind: Secret\\nmetadata:\\n name: elasticsearch-password\\n namespace: openshift-logging\\ntype: Opaque\\nstringData:\\n username: YOUR_USERNAME\\n password: YOUR_PASSWORD\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Finally, we create the ClusterLogForwarder resource:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-yaml\\",children:`kind: ClusterLogForwarder\\napiVersion: logging.openshift.io/v1\\nmetadata:\\n name: instance\\n namespace: openshift-logging\\nspec:\\n outputs:\\n - name: remote-elasticsearch\\n secret:\\n name: elasticsearch-password\\n type: elasticsearch\\n url: \\"https://YOUR_ELASTICSEARCH_URL:443\\"\\n elasticsearch:\\n version: 8 # The default is version 6 with the _type field\\n pipelines:\\n - inputRefs:\\n - application\\n - audit\\n name: enable-default-log-store\\n outputRefs:\\n - remote-elasticsearch\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Note that we explicitly defined the version of Elasticsearch to be 8, otherwise the ClusterLogForwarder will send the _type field, which is not compatible with Elasticsearch 8 and that we collect only application and audit logs.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"result\\",children:\\"Result\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once the logs are collected and passed through all the pipelines, the result is very close to the out-of-the-box Kubernetes integration. There are important differences, like the lack of host and cloud metadata information that don\\\\u2019t seem to be collected (at least without an additional configuration). We can view the Kubernetes container logs in the logs explorer:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/openshift-container-logs-red-hat-logging-operator/openshift-summary-blog-graphs.png\\",alt:\\"openshift-summary-blog-graphs\\",width:\\"1600\\",height:\\"809\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In this post, we described how you can use the OpenShift Logging Operator to collect the logs of containers and audit logs. We still recommend leveraging Elastic Agent to collect all your logs. It is the best user experience you can get. No need to maintain or transform the logs yourself to ECS formatting. Additionally, Elastic Agent uses API keys as the authentication method and collects metadata like cloud information that allow you in the long run to do \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/optimize-cloud-resources-cost-apm-metadata-elastic-observability\\",rel:\\"nofollow\\",children:\\"more\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/log-monitoring\\",rel:\\"nofollow\\",children:\\"Learn more about log monitoring with the Elastic Stack\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.em,{children:\\"Have feedback on this blog?\\"}),\\" \\",(0,n.jsx)(e.a,{href:\\"https://github.com/herrBez/elastic-blog-openshift-logging/issues\\",rel:\\"nofollow\\",children:(0,n.jsx)(e.em,{children:\\"Share it here\\"})}),(0,n.jsx)(e.em,{children:\\".\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(d,{...t})}):d(t)}return k(v);})();\\n;return Component;"},"_id":"articles/openshift-container-logs-red-hat-logging-operator.mdx","_raw":{"sourceFilePath":"articles/openshift-container-logs-red-hat-logging-operator.mdx","sourceFileName":"openshift-container-logs-red-hat-logging-operator.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/openshift-container-logs-red-hat-logging-operator"},"type":"Article","imageUrl":"/assets/images/openshift-container-logs-red-hat-logging-operator/139687_-_Blog_Header_Banner_V1.jpg","readingTime":"23 min read","url":"/openshift-container-logs-red-hat-logging-operator","headings":[{"level":2,"title":"Why use OpenShift Logging Operator?","href":"#why-use-openshift-logging-operator"},{"level":2,"title":"Which logs are we going to collect?","href":"#which-logs-are-we-going-to-collect"},{"level":2,"title":"Getting started","href":"#getting-started"},{"level":3,"title":"Inside Elasticsearch","href":"#inside-elasticsearch"},{"level":3,"title":"Create a user for sending the logs","href":"#create-a-user-for-sending-the-logs"},{"level":3,"title":"On OpenShift","href":"#on-openshift"},{"level":2,"title":"Result","href":"#result"}]},{"title":"How to combine OpenTelemetry instrumentation with Elastic APM Agent features","slug":"opentelemetry-instrumentation-apm-agent-features","date":"2023-07-13","description":"This post shows you how you can combine the OpenTelemetry tracing APIs with Elastic APM Agents. You\'ll learn how OpenTelemetry spans became part of a trace that Elastic APM Agents report.","image":"opentelemetry_apm-blog-720x420.jpeg","author":[{"slug":"greg-kalapos","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"python","type":"Tag","_raw":{}},{"slug":"elastic-agent","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic APM supports OpenTelemetry on multiple levels. One easy-to understand scenario, which [we previously blogged about](https://www.elastic.co/blog/opentelemetry-observability), is the direct OpenTelemetry Protocol (OTLP) support in APM Server. This means that you can connect any OpenTelemetry agent to an Elastic APM Server and the APM Server will happily take that data, ingest it into Elasticsearch\xae, and you can view that OpenTelemetry data in the APM app in Kibana\xae.\\n\\nThis blog post will showcase a different use-case: within Elastic APM, we have [our own APM Agents](https://www.elastic.co/guide/en/apm/agent/index.html). Some of these have download numbers in the tens of millions, and some of them predate OpenTelemetry. Of course we realize OpenTelemetry is very important and it’s here to stay, so we wanted to make these agents OpenTelemetry compatible and illustrate them using [OpenTelemetry visualizations](https://www.elastic.co/observability/opentelemetry) in this blog.\\n\\nMost of our Elastic APM Agents today are able to ship OpenTelemetry spans as part of a trace. This means that if you have any component in your application that emits an OpenTelemetry span, it’ll be part of the trace the Elastic APM Agent captures. This can be a library you use that is already instrumented by the OpenTelemetry API, or it can be any other OpenTelemetry span that an application developer added into the application’s code for manual instrumentation.\\n\\nThis feature of the Elastic APM Agents not only reports those spans but also properly maintains parent-child relationships between all spans, making OpenTelemetry a first-class citizen for these agents. If, for example, an Elastic APM Agent starts a span for a specific action by auto-instrumentation and then within that span the OpenTelemetry API starts another span, then the OpenTelemetry span will be the child of the outer span created by the agent. This is reflected in the parent.id field of the spans. It’s the same the other way around as well: if a span is created by the OpenTelemetry API and within that span an Elastic APM agent captures another span, then the span created by the Elastic APM Agent will be the child of the other span created by the OpenTelemetry API.\\n\\nThis feature is present in the following agents:\\n\\n- [Java](https://www.elastic.co/guide/en/apm/agent/java/current/opentelemetry-bridge.html)\\n- [.NET](https://www.elastic.co/guide/en/apm/agent/dotnet/master/opentelemetry-bridge.html)\\n- [Python](https://www.elastic.co/guide/en/apm/agent/python/current/opentelemetry-bridge.html)\\n- [Node.js](https://www.elastic.co/guide/en/apm/agent/nodejs/current/opentelemetry-bridge.html)\\n- [Go](https://www.elastic.co/guide/en/apm/agent/go/current/opentelemetry.html)\\n\\n## Capturing OpenTelemetry spans in the Elastic .NET APM Agent\\n\\nAs a first example, let’s take an ASP.NET Core application. We’ll put the .NET Elastic APM Agent into this application, and we’ll turn on the feature, which automatically bridges OpenTelemetry spans, so the Elastic APM Agent will make those spans part of the trace it reports.\\n\\nThe following code snippet shows a controller:\\n\\n```csharp\\nnamespace SampleAspNetCoreApp.Controllers\\n{\\n\\tpublic class HomeController : Controller\\n\\t{\\n\\t\\tprivate readonly SampleDataContext _sampleDataContext;\\n\\t\\tprivate ActivitySource _activitySource = new ActivitySource(\\"HomeController\\");\\n\\t\\tpublic HomeController(SampleDataContext sampleDataContext) => _sampleDataContext = sampleDataContext;\\n\\t\\tpublic async Task Index()\\n\\t\\t{\\n\\t\\t\\tawait ReadGitHubStars();\\n\\t\\t\\treturn View();\\n\\t\\t}\\n\\t\\tpublic async Task ReadGitHubStars()\\n\\t\\t{\\n\\t\\t\\tusing var activity = _activitySource.StartActivity();\\n\\t\\t\\tvar httpClient = new HttpClient();\\n\\t\\t\\thttpClient.DefaultRequestHeaders.Add(\\"User-Agent\\", \\"APM-Sample-App\\");\\n\\t\\t\\tvar responseMsg = await httpClient.GetAsync(\\"https://api.github.com/repos/elastic/apm-agent-dotnet\\");\\n\\t\\t\\tvar responseStr = await responseMsg.Content.ReadAsStringAsync();\\n\\t\\t\\t// …use responseStr\\n\\t\\t}\\n\\t}\\n}\\n```\\n\\nThe Index method calls the ReadGitHubStars method and after that we simply return the corresponding view from the method.\\n\\nThe incoming HTTP call and the outgoing HTTP call by the HttpClient are automatically captured by the Elastic APM Agent — this is part of the auto instrumentation we had for a very long time.\\n\\nThe ReadGitHubStars is the one where we use the OpenTelemetry API. OpenTelemetry in .NET uses the ActivitySource and Activity APIs. The \\\\_activitySource.StartActivity() call simply creates an OpenTelemetry span that automatically takes the name of the method by using the [CallerMemberNameAttribute](https://learn.microsoft.com/en-us/dotnet/api/system.runtime.compilerservices.callermembernameattribute?view=net-7.0) C# language feature, and this span will end when the method runs to completion.\\n\\nAdditionally, within this span we call the GitHub API with the HttpClient type. For this type, the .NET Elastic APM Agent again offers auto instrumentation, so the HTTP call will be also captured as a span by the agent automatically.\\n\\nAnd here is how the water-flow chart for this transaction looks in Kibana:\\n\\n![trace sample kibana](/assets/images/opentelemetry-instrumentation-apm-agent-features/elastic-blog-1-trace-sample.png)\\n\\nAs you can see, the agent was able to capture the OpenTelemetry span as part of the trace.\\n\\n## Bridging OpenTelemetry spans in Python by using the Python Elastic APM Agent\\n\\nLet’s see how this works in the case of Python. The idea is the same, so all the concepts introduced previously apply to this example as well.\\n\\nWe take a very simple Django example:\\n\\n```python\\nfrom django.http import HttpResponse\\nfrom elasticapm.contrib.opentelemetry import Tracer\\nimport requests\\n\\n\\ndef index(request):\\n tracer = Tracer(__name__)\\n with tracer.start_as_current_span(\\"ReadGitHubStars\\"):\\n url = \\"https://api.github.com/repos/elastic/apm-agent-python\\"\\n response = requests.get(url)\\n return HttpResponse(response)\\n```\\n\\nThe first step to turn on capturing OpenTelemetry spans in Python is to import the Tracer implementation from elasticapm.contrib.opentelemetry.\\n\\nAnd then on this Tracer you can start a new span — in this case, we manually name the span ReadGitHubStars.\\n\\nSimilarly to the previous example, the call to https://127.0.0.1:8000/otelsample/ is captured by the Elastic APM Python Agent, and then the next span is created by the OpenTelemetry API, which, as you can see, is captured by the agent automatically, and then finally the HTTP call to the GitHub API is captured again by the auto instrumentation of the agent.\\n\\nHere is how it looks in the water-flow chart:\\n\\n![water-flow chart](/assets/images/opentelemetry-instrumentation-apm-agent-features/elastic-blog-2-trace-sample-2.png)\\n\\nAs already mentioned, the agent maintains the parent-child relationship for all the OTel spans. Let’s take a look at the parent.id of the GET api.github.com call:\\n\\n![OTel span details](/assets/images/opentelemetry-instrumentation-apm-agent-features/elastic-blog-3-span-details.png)\\n\\nAs you can see, the id of this span is c98401c94d40b87a.\\n\\nIf we look at the span.id of the ReadGitHubStars OpenTelemetry span, then we can see that the id of this span is exactly c98401c94d40b87a — so the APM Agent internally maintains parent-child relationships across OpenTelemetry and non-OpenTelemetry spans, which makes OpenTelemetry spans first-class citizens in Elastic APM Agents.\\n\\n![OpenTelemetry spans first-class citizens in Elastic APM Agents](/assets/images/opentelemetry-instrumentation-apm-agent-features/elastic-blog-4-span-details-2.png)\\n\\n## Other languages\\n\\nAt this point, I\'ll stop to just replicate the exact same sample code in further languages — I think you already got the point here: in each language listed above, our Elastic APM Agents are able to bridge OpenTelemetry traces and show them in Kibana as native spans. We also [blogged about using the same API in Java](https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin), and you can see examples for the rest of the languages in the corresponding agent documentation (linked above).\\n\\n## When to use this feature and when to use pure OpenTelemetry SDKs\\n\\nThis is really up to you. If you want to only have pure OpenTelemetry usage in your applications and you really want to avoid any vendor-related software, then feel free to use OpenTelemetry SDKs directly — that is a use case we clearly support. If you go that route, this feature is not so relevant to you.\\n\\nHowever, our Elastic APM Agents already have a very big user base and they offer features that are not present in OpenTelemetry. Some of these features are [span compression](https://www.elastic.co/guide/en/apm/guide/current/span-compression.html), [central configuration](https://www.elastic.co/guide/en/kibana/current/agent-configuration.html), [inferred spans](https://www.elastic.co/guide/en/apm/agent/java/current/method-sampling-based.html), distributed [tail based sampling](https://www.elastic.co/guide/en/apm/guide/current/configure-tail-based-sampling.html) with multiple APM Servers, and many more.\\n\\nIf you are one of the many existing Elastic APM Agent users, or you plan to use an Elastic APM Agent because of the features mentioned above, then bridging OpenTelemetry spans enables you to still use the OpenTelemetry API and not rely on any vendor related API usage. That way your developer teams can instrument your application with OpenTelemetry, and you can also use any third-party library already instrumented by OpenTelemetry, and Elastic APM Agents will happily report those spans as part of the traces they report. With this, you can combine the vendor independent nature of OpenTelemetry and still use the feature rich Elastic APM Agents.\\n\\nThe OpenTelemetry bridge feature is also a good tool to use if you wish to change your telemetry library from an Elastic APM Agent to OpenTelemetry (and vice-versa), as it allows you to use both libraries together and switch them using atomic changes.\\n\\n## Next steps\\n\\nIn this blog post, we discussed how you can bridge OpenTelemetry spans with Elastic APM Agents. Of course OpenTelemetry is more than just traces. We know that, and we plan to cover further areas: currently we are working on bridging OpenTelemetry metrics in our Elastic APM Agents in a very similar fashion. You can watch the progress [here](https://github.com/elastic/apm/issues/691).\\n\\n[Learn more about adding Elastic APM as part of your Elastic Observability deployment](https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment).\\n","code":"var Component=(()=>{var d=Object.create;var s=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var y=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var a in e)s(n,a,{get:e[a],enumerable:!0})},l=(n,e,a,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of u(e))!g.call(n,i)&&i!==a&&s(n,i,{get:()=>e[i],enumerable:!(r=m(e,i))||r.enumerable});return n};var b=(n,e,a)=>(a=n!=null?d(y(n)):{},l(e||!n||!n.__esModule?s(a,\\"default\\",{value:n,enumerable:!0}):a,n)),A=n=>l(s({},\\"__esModule\\",{value:!0}),n);var h=w((O,o)=>{o.exports=_jsx_runtime});var P={};f(P,{default:()=>c,frontmatter:()=>T});var t=b(h()),T={title:\\"How to combine OpenTelemetry instrumentation with Elastic APM Agent features\\",slug:\\"opentelemetry-instrumentation-apm-agent-features\\",date:\\"2023-07-13\\",description:\\"This post shows you how you can combine the OpenTelemetry tracing APIs with Elastic APM Agents. You\'ll learn how OpenTelemetry spans became part of a trace that Elastic APM Agents report.\\",author:[{slug:\\"greg-kalapos\\"}],image:\\"opentelemetry_apm-blog-720x420.jpeg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"python\\"},{slug:\\"elastic-agent\\"},{slug:\\"apm\\"}]};function p(n){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"Elastic APM supports OpenTelemetry on multiple levels. One easy-to understand scenario, which \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"we previously blogged about\\"}),\\", is the direct OpenTelemetry Protocol (OTLP) support in APM Server. This means that you can connect any OpenTelemetry agent to an Elastic APM Server and the APM Server will happily take that data, ingest it into Elasticsearch\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\", and you can view that OpenTelemetry data in the APM app in Kibana\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This blog post will showcase a different use-case: within Elastic APM, we have \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/index.html\\",rel:\\"nofollow\\",children:\\"our own APM Agents\\"}),\\". Some of these have download numbers in the tens of millions, and some of them predate OpenTelemetry. Of course we realize OpenTelemetry is very important and it\\\\u2019s here to stay, so we wanted to make these agents OpenTelemetry compatible and illustrate them using \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"OpenTelemetry visualizations\\"}),\\" in this blog.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Most of our Elastic APM Agents today are able to ship OpenTelemetry spans as part of a trace. This means that if you have any component in your application that emits an OpenTelemetry span, it\\\\u2019ll be part of the trace the Elastic APM Agent captures. This can be a library you use that is already instrumented by the OpenTelemetry API, or it can be any other OpenTelemetry span that an application developer added into the application\\\\u2019s code for manual instrumentation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This feature of the Elastic APM Agents not only reports those spans but also properly maintains parent-child relationships between all spans, making OpenTelemetry a first-class citizen for these agents. If, for example, an Elastic APM Agent starts a span for a specific action by auto-instrumentation and then within that span the OpenTelemetry API starts another span, then the OpenTelemetry span will be the child of the outer span created by the agent. This is reflected in the parent.id field of the spans. It\\\\u2019s the same the other way around as well: if a span is created by the OpenTelemetry API and within that span an Elastic APM agent captures another span, then the span created by the Elastic APM Agent will be the child of the other span created by the OpenTelemetry API.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This feature is present in the following agents:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/java/current/opentelemetry-bridge.html\\",rel:\\"nofollow\\",children:\\"Java\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/dotnet/master/opentelemetry-bridge.html\\",rel:\\"nofollow\\",children:\\".NET\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/python/current/opentelemetry-bridge.html\\",rel:\\"nofollow\\",children:\\"Python\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/nodejs/current/opentelemetry-bridge.html\\",rel:\\"nofollow\\",children:\\"Node.js\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/go/current/opentelemetry.html\\",rel:\\"nofollow\\",children:\\"Go\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"capturing-opentelemetry-spans-in-the-elastic-net-apm-agent\\",children:\\"Capturing OpenTelemetry spans in the Elastic .NET APM Agent\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As a first example, let\\\\u2019s take an ASP.NET Core application. We\\\\u2019ll put the .NET Elastic APM Agent into this application, and we\\\\u2019ll turn on the feature, which automatically bridges OpenTelemetry spans, so the Elastic APM Agent will make those spans part of the trace it reports.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The following code snippet shows a controller:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-csharp\\",children:`namespace SampleAspNetCoreApp.Controllers\\n{\\n\\tpublic class HomeController : Controller\\n\\t{\\n\\t\\tprivate readonly SampleDataContext _sampleDataContext;\\n\\t\\tprivate ActivitySource _activitySource = new ActivitySource(\\"HomeController\\");\\n\\t\\tpublic HomeController(SampleDataContext sampleDataContext) => _sampleDataContext = sampleDataContext;\\n\\t\\tpublic async Task Index()\\n\\t\\t{\\n\\t\\t\\tawait ReadGitHubStars();\\n\\t\\t\\treturn View();\\n\\t\\t}\\n\\t\\tpublic async Task ReadGitHubStars()\\n\\t\\t{\\n\\t\\t\\tusing var activity = _activitySource.StartActivity();\\n\\t\\t\\tvar httpClient = new HttpClient();\\n\\t\\t\\thttpClient.DefaultRequestHeaders.Add(\\"User-Agent\\", \\"APM-Sample-App\\");\\n\\t\\t\\tvar responseMsg = await httpClient.GetAsync(\\"https://api.github.com/repos/elastic/apm-agent-dotnet\\");\\n\\t\\t\\tvar responseStr = await responseMsg.Content.ReadAsStringAsync();\\n\\t\\t\\t// \\\\u2026use responseStr\\n\\t\\t}\\n\\t}\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Index method calls the ReadGitHubStars method and after that we simply return the corresponding view from the method.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The incoming HTTP call and the outgoing HTTP call by the HttpClient are automatically captured by the Elastic APM Agent \\\\u2014 this is part of the auto instrumentation we had for a very long time.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The ReadGitHubStars is the one where we use the OpenTelemetry API. OpenTelemetry in .NET uses the ActivitySource and Activity APIs. The _activitySource.StartActivity() call simply creates an OpenTelemetry span that automatically takes the name of the method by using the \\",(0,t.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/dotnet/api/system.runtime.compilerservices.callermembernameattribute?view=net-7.0\\",rel:\\"nofollow\\",children:\\"CallerMemberNameAttribute\\"}),\\" C# language feature, and this span will end when the method runs to completion.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Additionally, within this span we call the GitHub API with the HttpClient type. For this type, the .NET Elastic APM Agent again offers auto instrumentation, so the HTTP call will be also captured as a span by the agent automatically.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"And here is how the water-flow chart for this transaction looks in Kibana:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-instrumentation-apm-agent-features/elastic-blog-1-trace-sample.png\\",alt:\\"trace sample kibana\\",width:\\"1999\\",height:\\"609\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see, the agent was able to capture the OpenTelemetry span as part of the trace.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"bridging-opentelemetry-spans-in-python-by-using-the-python-elastic-apm-agent\\",children:\\"Bridging OpenTelemetry spans in Python by using the Python Elastic APM Agent\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s see how this works in the case of Python. The idea is the same, so all the concepts introduced previously apply to this example as well.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We take a very simple Django example:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-python\\",children:`from django.http import HttpResponse\\nfrom elasticapm.contrib.opentelemetry import Tracer\\nimport requests\\n\\n\\ndef index(request):\\n tracer = Tracer(__name__)\\n with tracer.start_as_current_span(\\"ReadGitHubStars\\"):\\n url = \\"https://api.github.com/repos/elastic/apm-agent-python\\"\\n response = requests.get(url)\\n return HttpResponse(response)\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The first step to turn on capturing OpenTelemetry spans in Python is to import the Tracer implementation from elasticapm.contrib.opentelemetry.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"And then on this Tracer you can start a new span \\\\u2014 in this case, we manually name the span ReadGitHubStars.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Similarly to the previous example, the call to \\",(0,t.jsx)(e.a,{href:\\"https://127.0.0.1:8000/otelsample/\\",rel:\\"nofollow\\",children:\\"https://127.0.0.1:8000/otelsample/\\"}),\\" is captured by the Elastic APM Python Agent, and then the next span is created by the OpenTelemetry API, which, as you can see, is captured by the agent automatically, and then finally the HTTP call to the GitHub API is captured again by the auto instrumentation of the agent.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here is how it looks in the water-flow chart:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-instrumentation-apm-agent-features/elastic-blog-2-trace-sample-2.png\\",alt:\\"water-flow chart\\",width:\\"1999\\",height:\\"589\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As already mentioned, the agent maintains the parent-child relationship for all the OTel spans. Let\\\\u2019s take a look at the parent.id of the GET api.github.com call:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-instrumentation-apm-agent-features/elastic-blog-3-span-details.png\\",alt:\\"OTel span details\\",width:\\"1999\\",height:\\"728\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see, the id of this span is c98401c94d40b87a.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If we look at the span.id of the ReadGitHubStars OpenTelemetry span, then we can see that the id of this span is exactly c98401c94d40b87a \\\\u2014 so the APM Agent internally maintains parent-child relationships across OpenTelemetry and non-OpenTelemetry spans, which makes OpenTelemetry spans first-class citizens in Elastic APM Agents.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-instrumentation-apm-agent-features/elastic-blog-4-span-details-2.png\\",alt:\\"OpenTelemetry spans first-class citizens in Elastic APM Agents\\",width:\\"1999\\",height:\\"777\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"other-languages\\",children:\\"Other languages\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"At this point, I\'ll stop to just replicate the exact same sample code in further languages \\\\u2014 I think you already got the point here: in each language listed above, our Elastic APM Agents are able to bridge OpenTelemetry traces and show them in Kibana as native spans. We also \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/create-your-own-instrumentation-with-the-java-agent-plugin\\",rel:\\"nofollow\\",children:\\"blogged about using the same API in Java\\"}),\\", and you can see examples for the rest of the languages in the corresponding agent documentation (linked above).\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"when-to-use-this-feature-and-when-to-use-pure-opentelemetry-sdks\\",children:\\"When to use this feature and when to use pure OpenTelemetry SDKs\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This is really up to you. If you want to only have pure OpenTelemetry usage in your applications and you really want to avoid any vendor-related software, then feel free to use OpenTelemetry SDKs directly \\\\u2014 that is a use case we clearly support. If you go that route, this feature is not so relevant to you.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"However, our Elastic APM Agents already have a very big user base and they offer features that are not present in OpenTelemetry. Some of these features are \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/span-compression.html\\",rel:\\"nofollow\\",children:\\"span compression\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/agent-configuration.html\\",rel:\\"nofollow\\",children:\\"central configuration\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/java/current/method-sampling-based.html\\",rel:\\"nofollow\\",children:\\"inferred spans\\"}),\\", distributed \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/configure-tail-based-sampling.html\\",rel:\\"nofollow\\",children:\\"tail based sampling\\"}),\\" with multiple APM Servers, and many more.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you are one of the many existing Elastic APM Agent users, or you plan to use an Elastic APM Agent because of the features mentioned above, then bridging OpenTelemetry spans enables you to still use the OpenTelemetry API and not rely on any vendor related API usage. That way your developer teams can instrument your application with OpenTelemetry, and you can also use any third-party library already instrumented by OpenTelemetry, and Elastic APM Agents will happily report those spans as part of the traces they report. With this, you can combine the vendor independent nature of OpenTelemetry and still use the feature rich Elastic APM Agents.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The OpenTelemetry bridge feature is also a good tool to use if you wish to change your telemetry library from an Elastic APM Agent to OpenTelemetry (and vice-versa), as it allows you to use both libraries together and switch them using atomic changes.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"next-steps\\",children:\\"Next steps\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this blog post, we discussed how you can bridge OpenTelemetry spans with Elastic APM Agents. Of course OpenTelemetry is more than just traces. We know that, and we plan to cover further areas: currently we are working on bridging OpenTelemetry metrics in our Elastic APM Agents in a very similar fashion. You can watch the progress \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm/issues/691\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/adding-free-and-open-elastic-apm-as-part-of-your-elastic-observability-deployment\\",rel:\\"nofollow\\",children:\\"Learn more about adding Elastic APM as part of your Elastic Observability deployment\\"}),\\".\\"]})]})}function c(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(p,{...n})}):p(n)}return A(P);})();\\n;return Component;"},"_id":"articles/opentelemetry-instrumentation-elastic-apm-agent-features.mdx","_raw":{"sourceFilePath":"articles/opentelemetry-instrumentation-elastic-apm-agent-features.mdx","sourceFileName":"opentelemetry-instrumentation-elastic-apm-agent-features.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/opentelemetry-instrumentation-elastic-apm-agent-features"},"type":"Article","imageUrl":"/assets/images/opentelemetry-instrumentation-apm-agent-features/opentelemetry_apm-blog-720x420.jpeg","readingTime":"8 min read","url":"/opentelemetry-instrumentation-apm-agent-features","headings":[{"level":2,"title":"Capturing OpenTelemetry spans in the Elastic .NET APM Agent","href":"#capturing-opentelemetry-spans-in-the-elastic-net-apm-agent"},{"level":2,"title":"Bridging OpenTelemetry spans in Python by using the Python Elastic APM Agent","href":"#bridging-opentelemetry-spans-in-python-by-using-the-python-elastic-apm-agent"},{"level":2,"title":"Other languages","href":"#other-languages"},{"level":2,"title":"When to use this feature and when to use pure OpenTelemetry SDKs","href":"#when-to-use-this-feature-and-when-to-use-pure-opentelemetry-sdks"},{"level":2,"title":"Next steps","href":"#next-steps"}]},{"title":"Automatic cloud resource attributes with OpenTelemetry Java","slug":"opentelemetry-java-automatic-cloud-resource-attributes","date":"2024-06-27","description":"Capturing cloud resource attributes allow to describe application cloud deployment details. In this article we describe three distinct ways to enable them for Java applications using OpenTelemetry","image":"flexible-implementation-1680X980.png","author":[{"slug":"sylvain-juge","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"aws","type":"Tag","_raw":{}},{"slug":"google-cloud","type":"Tag","_raw":{}},{"slug":"java","type":"Tag","_raw":{}},{"slug":"cloud-monitoring","type":"Tag","_raw":{}}],"body":{"raw":"\\nWith OpenTelemetry, the observed entities (application, services, processes, …) are described through resource attributes. The definitions and the values of those attributes are defined in the [semantic conventions](https://opentelemetry.io/docs/concepts/semantic-conventions/).\\\\\\nIn practice, for a typical java application running in a cloud environment like Google Cloud Platform (GCP), Amazon Web Services (AWS) or Azure, it means capturing the name of the cloud provider, the cloud service name or availability zone in addition to per-provider attributes. Those attributes are then used to describe and qualify the observability signals (logs, traces, metrics), defined by semantic conventions in the [cloud resource attributes](https://opentelemetry.io/docs/specs/semconv/resource/cloud/) section.\\n\\nWhen using the [OpenTelemetry Java SDK](https://github.com/open-telemetry/opentelemetry-java) or the [OpenTelemetry instrumentation agent](https://github.com/open-telemetry/opentelemetry-java-instrumentation), those attributes are not automatically captured by default. In this article we will show you first how to enable them with the SDK, then using the instrumentation agent and then we will show you how using the [Elastic OpenTelemetry Distribution](https://github.com/elastic/elastic-otel-java/) makes it even easier.\\n\\n## OpenTelemetry Java SDK\\n\\nThe OpenTelemetry Java SDK does not capture any cloud resource attributes, however it provides a pluggable service provider interface to register resource attributes providers and application developers have to provide the implementations.\\n\\nImplementations for [GCP](https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/gcp-resources) and [AWS](https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/aws-resources) are already included in the [OpenTelemetry Java Contrib](https://github.com/open-telemetry/opentelemetry-java-contrib/) repo, so if you are using one of those cloud providers then it\'s mostly a matter of adding those providers to your application dependencies. Thanks to autoconfiguration those should be automatically included and enabled once they are added to the application classpath. The [SDK documentation](https://github.com/open-telemetry/opentelemetry-java/tree/main/sdk-extensions/autoconfigure#resource-provider-spi) provides all the details to add and configure those in your application.\\n\\nIf you are using a cloud provider for which no such implementation is available, then you still have the option to provide your own which is a straightforward implementation of the [ResourceProvider](https://github.com/open-telemetry/opentelemetry-java/blob/main/sdk-extensions/autoconfigure/README.md#resource-provider-spi) SPI (Service Provider Interface). In order to keep things consistent, you will have to rely on the existing [cloud semantic conventions](https://opentelemetry.io/docs/specs/semconv/resource/cloud/).\\n\\nFor example here is an example of a simple cloud resource attributes provider for a fictitious cloud provider named \\"potatoes\\".\\n\\n```\\npackage potatoes;\\n\\nimport io.opentelemetry.api.common.Attributes;\\nimport io.opentelemetry.sdk.autoconfigure.spi.ConfigProperties;\\nimport io.opentelemetry.sdk.autoconfigure.spi.ResourceProvider;\\nimport io.opentelemetry.sdk.resources.Resource;\\nimport io.opentelemetry.semconv.incubating.CloudIncubatingAttributes;\\n\\npublic class PotatoesResourceProvider implements ResourceProvider {\\n\\n@Override\\npublic Resource createResource(ConfigProperties configProperties) {\\n return Resource.create(Attributes.of(\\n CloudIncubatingAttributes.CLOUD_PROVIDER, \\"potatoes\\",\\n CloudIncubatingAttributes.CLOUD_PLATFORM, \\"french-fries\\",\\n CloudIncubatingAttributes.CLOUD_REGION, \\"garden\\"\\n ));\\n }\\n}\\n```\\n\\n\\n## OpenTelemetry Java instrumentation\\n\\nThe [OpenTelemetry Java Instrumentation](https://github.com/open-telemetry/opentelemetry-java-instrumentation) provides a java agent that instruments the application at runtime automatically for an extensive set of frameworks and libraries (see [supported technologies](https://github.com/open-telemetry/opentelemetry-java-instrumentation/blob/main/docs/supported-libraries.md)).\\n\\nUsing instrumentation means that the application bytecode and the embedded libraries are modified automatically to make them behave as if explicit modifications were made in their source code to call the OpenTelemetry SDK in order to create traces, spans and metrics.\\n\\nWhen an application is deployed with the OpenTelemetry instrumentation agent, the cloud resource attributes for GCP and AWS are included but not enabled by default since version 2.2.0. You can enable them [through configuration](https://opentelemetry.io/docs/languages/java/automatic/configuration/#enable-resource-providers-that-are-disabled-by-default) by setting the following properties:\\n\\n- For AWS: `otel.resource.providers.aws.enabled=true`\\n\\n- For GCP: `otel.resource.providers.gcp.enabled=true`\\n\\n\\n## Elastic OpenTelemetry Java Distribution\\n\\nThe Elastic OpenTelemetry Java distribution relies on the OpenTelemetry Java instrumentation which we often refer to as the Vanilla OpenTelemetry, and it thus inherits all of its features.\\n\\nOne major difference though is that the resource attributes providers for GCP and AWS are included and enabled by default to provide a better onboarding experience without extra configuration.\\n\\nThe minor cost to this is that it might make the application startup slightly slower due to having to call an HTTP(S) endpoint. This overhead is usually negligible compared to application startup but can become significant for some setups.\\n\\nIn order to reduce the startup overhead, or when the cloud provider is known in advance, you can selectively disable unused provider implementations through configuration:\\n\\n- For AWS: `otel.resource.providers.aws.enabled=false`\\n\\n- For GCP: `otel.resource.providers.gcp.enabled=false`\\n\\n\\n## Conclusion\\n\\nWith this blogpost we have introduced what OpenTelemetry cloud resource attributes are and how they can be used and configured into application deployments using either OpenTelemetry SDK/API and Instrumentation agents.\\n\\nWhen using the Elastic OpenTelemetry Java distribution, those resource providers are automatically provided and enabled for an easy and simple onboarding experience.\\n\\nAnother very interesting aspect of the cloud resource attribute providers available in the [opentelemetry-java-contrib](https://github.com/open-telemetry/opentelemetry-java-contrib) repository is that they are maintained by their respective vendors (Google and Amazon). For the end-user it means those implementations should be quite well tested and be robust to changes in the underlying infrastructure. For solution vendors like Elastic, it means we don\'t have to re-implement and reverse-engineer the infrastructure details of every cloud provider, hence proving that investing in those common components is a net win for the broader OpenTelemetry community.\\n","code":"var Component=(()=>{var h=Object.create;var i=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var b=Object.getPrototypeOf,v=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),y=(n,e)=>{for(var o in e)i(n,o,{get:e[o],enumerable:!0})},l=(n,e,o,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of m(e))!v.call(n,r)&&r!==o&&i(n,r,{get:()=>e[r],enumerable:!(a=p(e,r))||a.enumerable});return n};var g=(n,e,o)=>(o=n!=null?h(b(n)):{},l(e||!n||!n.__esModule?i(o,\\"default\\",{value:n,enumerable:!0}):o,n)),w=n=>l(i({},\\"__esModule\\",{value:!0}),n);var c=f((C,s)=>{s.exports=_jsx_runtime});var O={};y(O,{default:()=>u,frontmatter:()=>T});var t=g(c()),T={title:\\"Automatic cloud resource attributes with OpenTelemetry Java\\",slug:\\"opentelemetry-java-automatic-cloud-resource-attributes\\",description:\\"Capturing cloud resource attributes allow to describe application cloud deployment details. In this article we describe three distinct ways to enable them for Java applications using OpenTelemetry\\",author:[{slug:\\"sylvain-juge\\"}],tags:[{slug:\\"opentelemetry\\"},{slug:\\"aws\\"},{slug:\\"google-cloud\\"},{slug:\\"java\\"},{slug:\\"cloud-monitoring\\"}],date:\\"2024-06-27\\",image:\\"flexible-implementation-1680X980.png\\"};function d(n){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",h2:\\"h2\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"With OpenTelemetry, the observed entities (application, services, processes, \\\\u2026) are described through resource attributes. The definitions and the values of those attributes are defined in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/concepts/semantic-conventions/\\",rel:\\"nofollow\\",children:\\"semantic conventions\\"}),\\".\\",(0,t.jsx)(e.br,{}),`\\n`,\\"In practice, for a typical java application running in a cloud environment like Google Cloud Platform (GCP), Amazon Web Services (AWS) or Azure, it means capturing the name of the cloud provider, the cloud service name or availability zone in addition to per-provider attributes. Those attributes are then used to describe and qualify the observability signals (logs, traces, metrics), defined by semantic conventions in the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/semconv/resource/cloud/\\",rel:\\"nofollow\\",children:\\"cloud resource attributes\\"}),\\" section.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"When using the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java SDK\\"}),\\" or the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation\\",rel:\\"nofollow\\",children:\\"OpenTelemetry instrumentation agent\\"}),\\", those attributes are not automatically captured by default. In this article we will show you first how to enable them with the SDK, then using the instrumentation agent and then we will show you how using the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java/\\",rel:\\"nofollow\\",children:\\"Elastic OpenTelemetry Distribution\\"}),\\" makes it even easier.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"opentelemetry-java-sdk\\",children:\\"OpenTelemetry Java SDK\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The OpenTelemetry Java SDK does not capture any cloud resource attributes, however it provides a pluggable service provider interface to register resource attributes providers and application developers have to provide the implementations.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Implementations for \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/gcp-resources\\",rel:\\"nofollow\\",children:\\"GCP\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib/tree/main/aws-resources\\",rel:\\"nofollow\\",children:\\"AWS\\"}),\\" are already included in the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java Contrib\\"}),\\" repo, so if you are using one of those cloud providers then it\'s mostly a matter of adding those providers to your application dependencies. Thanks to autoconfiguration those should be automatically included and enabled once they are added to the application classpath. The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java/tree/main/sdk-extensions/autoconfigure#resource-provider-spi\\",rel:\\"nofollow\\",children:\\"SDK documentation\\"}),\\" provides all the details to add and configure those in your application.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you are using a cloud provider for which no such implementation is available, then you still have the option to provide your own which is a straightforward implementation of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java/blob/main/sdk-extensions/autoconfigure/README.md#resource-provider-spi\\",rel:\\"nofollow\\",children:\\"ResourceProvider\\"}),\\" SPI (Service Provider Interface). In order to keep things consistent, you will have to rely on the existing \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/semconv/resource/cloud/\\",rel:\\"nofollow\\",children:\\"cloud semantic conventions\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\'For example here is an example of a simple cloud resource attributes provider for a fictitious cloud provider named \\"potatoes\\".\'}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`package potatoes;\\n\\nimport io.opentelemetry.api.common.Attributes;\\nimport io.opentelemetry.sdk.autoconfigure.spi.ConfigProperties;\\nimport io.opentelemetry.sdk.autoconfigure.spi.ResourceProvider;\\nimport io.opentelemetry.sdk.resources.Resource;\\nimport io.opentelemetry.semconv.incubating.CloudIncubatingAttributes;\\n\\npublic class PotatoesResourceProvider implements ResourceProvider {\\n\\n@Override\\npublic Resource createResource(ConfigProperties configProperties) {\\n return Resource.create(Attributes.of(\\n CloudIncubatingAttributes.CLOUD_PROVIDER, \\"potatoes\\",\\n CloudIncubatingAttributes.CLOUD_PLATFORM, \\"french-fries\\",\\n CloudIncubatingAttributes.CLOUD_REGION, \\"garden\\"\\n ));\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"opentelemetry-java-instrumentation\\",children:\\"OpenTelemetry Java instrumentation\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java Instrumentation\\"}),\\" provides a java agent that instruments the application at runtime automatically for an extensive set of frameworks and libraries (see \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/blob/main/docs/supported-libraries.md\\",rel:\\"nofollow\\",children:\\"supported technologies\\"}),\\").\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Using instrumentation means that the application bytecode and the embedded libraries are modified automatically to make them behave as if explicit modifications were made in their source code to call the OpenTelemetry SDK in order to create traces, spans and metrics.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"When an application is deployed with the OpenTelemetry instrumentation agent, the cloud resource attributes for GCP and AWS are included but not enabled by default since version 2.2.0. You can enable them \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/languages/java/automatic/configuration/#enable-resource-providers-that-are-disabled-by-default\\",rel:\\"nofollow\\",children:\\"through configuration\\"}),\\" by setting the following properties:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"For AWS: \\",(0,t.jsx)(e.code,{children:\\"otel.resource.providers.aws.enabled=true\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"For GCP: \\",(0,t.jsx)(e.code,{children:\\"otel.resource.providers.gcp.enabled=true\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"elastic-opentelemetry-java-distribution\\",children:\\"Elastic OpenTelemetry Java Distribution\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Elastic OpenTelemetry Java distribution relies on the OpenTelemetry Java instrumentation which we often refer to as the Vanilla OpenTelemetry, and it thus inherits all of its features.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"One major difference though is that the resource attributes providers for GCP and AWS are included and enabled by default to provide a better onboarding experience without extra configuration.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The minor cost to this is that it might make the application startup slightly slower due to having to call an HTTP(S) endpoint. This overhead is usually negligible compared to application startup but can become significant for some setups.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In order to reduce the startup overhead, or when the cloud provider is known in advance, you can selectively disable unused provider implementations through configuration:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"For AWS: \\",(0,t.jsx)(e.code,{children:\\"otel.resource.providers.aws.enabled=false\\"})]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"For GCP: \\",(0,t.jsx)(e.code,{children:\\"otel.resource.providers.gcp.enabled=false\\"})]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"With this blogpost we have introduced what OpenTelemetry cloud resource attributes are and how they can be used and configured into application deployments using either OpenTelemetry SDK/API and Instrumentation agents.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"When using the Elastic OpenTelemetry Java distribution, those resource providers are automatically provided and enabled for an easy and simple onboarding experience.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Another very interesting aspect of the cloud resource attribute providers available in the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib\\",rel:\\"nofollow\\",children:\\"opentelemetry-java-contrib\\"}),\\" repository is that they are maintained by their respective vendors (Google and Amazon). For the end-user it means those implementations should be quite well tested and be robust to changes in the underlying infrastructure. For solution vendors like Elastic, it means we don\'t have to re-implement and reverse-engineer the infrastructure details of every cloud provider, hence proving that investing in those common components is a net win for the broader OpenTelemetry community.\\"]})]})}function u(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return w(O);})();\\n;return Component;"},"_id":"articles/opentelemetry-java-automatic-cloud-resource-attributes.mdx","_raw":{"sourceFilePath":"articles/opentelemetry-java-automatic-cloud-resource-attributes.mdx","sourceFileName":"opentelemetry-java-automatic-cloud-resource-attributes.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/opentelemetry-java-automatic-cloud-resource-attributes"},"type":"Article","imageUrl":"/assets/images/opentelemetry-java-automatic-cloud-resource-attributes/flexible-implementation-1680X980.png","readingTime":"5 min read","url":"/opentelemetry-java-automatic-cloud-resource-attributes","headings":[{"level":2,"title":"OpenTelemetry Java SDK","href":"#opentelemetry-java-sdk"},{"level":2,"title":"OpenTelemetry Java instrumentation","href":"#opentelemetry-java-instrumentation"},{"level":2,"title":"Elastic OpenTelemetry Java Distribution","href":"#elastic-opentelemetry-java-distribution"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Optimizing Observability with ES|QL: Streamlining SRE operations and issue resolution for Kubernetes and OTel","slug":"opentelemetry-kubernetes-esql","date":"2023-11-01","description":"ES|QL enhances operational efficiency, data analysis, and issue resolution for SREs. This blog covers the advantages of ES|QL in Elastic Observability and how it can apply to managing issues instrumented with OpenTelemetry and running on Kubernetes.","image":"ES_QL_blog-720x420-05.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"es-ql","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"ai-assistant","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}}],"body":{"raw":"\\nAs an operations engineer (SRE, IT Operations, DevOps), managing technology and data sprawl is an ongoing challenge. Simply managing the large volumes of high dimensionality and high cardinality data is overwhelming.\\n\\nAs a single platform, Elastic\xae helps SREs unify and correlate limitless telemetry data, including metrics, logs, traces, and profiling, into a single datastore — Elasticsearch\xae. By then applying the power of Elastic’s advanced machine learning (ML), AIOps, AI Assistant, and analytics, you can break down silos and turn data into insights. As a full-stack observability solution, everything from infrastructure monitoring to log monitoring and application performance monitoring (APM) can be found in a single, unified experience.\\n\\nIn Elastic 8.11, a technical preview is now available of [Elastic’s new piped query language, ES|QL (Elasticsearch Query Language)](https://www.elastic.co/blog/esql-elasticsearch-piped-query-language), which transforms, enriches, and simplifies data investigations. Powered by a new query engine, ES|QL delivers advanced search capabilities with concurrent processing, improving speed and efficiency, irrespective of data source and structure. Accelerate resolution by creating aggregations and visualizations from one screen, delivering an iterative, uninterrupted workflow.\\n\\n## Advantages of ES|QL for SREs\\n\\nSREs using Elastic Observability can leverage ES|QL to analyze logs, metrics, traces, and profiling data, enabling them to pinpoint performance bottlenecks and system issues with a single query. SREs gain the following advantages when managing high dimensionality and high cardinality data with ES|QL in Elastic Observability:\\n\\n- **Improved operational efficiency:** By using ES|QL, SREs can create more actionable notifications with aggregated values as thresholds from a single query, which can also be managed through the Elastic API and integrated into DevOps processes.\\n- **Enhanced analysis with insights:** ES|QL can process diverse observability data, including application, infrastructure, business data, and more, regardless of the source and structure. ES|QL can easily enrich the data with additional fields and context, allowing the creation of visualizations for dashboards or issue analysis with a single query.\\n- **Reduced mean time to resolution:** ES|QL, when combined with Elastic Observability\'s AIOps and AI Assistant, enhances detection accuracy by identifying trends, isolating incidents, and reducing false positives. This improvement in context facilitates troubleshooting and the quick pinpointing and resolution of issues.\\n\\nES|QL in Elastic Observability not only enhances an SRE\'s ability to manage the customer experience, an organization\'s revenue, and SLOs more effectively but also facilitates collaboration with developers and DevOps by providing contextualized aggregated data.\\n\\nIn this blog, we will cover some of the key use cases SREs can leverage with ES|QL:\\n\\n- ES|QL integrated with the Elastic AI Assistant, which uses public LLM and private data, enhances the analysis experience anywhere in Elastic Observability.\\n- SREs can, in a single ES|QL query, break down, analyze, and visualize observability data from multiple sources and across any time frame.\\n- Actionable alerts can be easily created from a single ES|QL query, enhancing operations.\\n\\nI will work through these use cases by showcasing how an SRE can solve a problem in an application instrumented with OpenTelemetry and running on Kubernetes. The OpenTelemetry (OTel) demo is on an Amazon EKS cluster, with Elastic Cloud 8.11 configured.\\n\\nYou can also check out our [Elastic Observability ES|QL Demo](https://www.youtube.com/watch?v=vm0pBWI2l9c), which walks through ES|QL functionality for Observability.\\n\\n## ES|QL with AI Assistant\\n\\nAs an SRE, you are monitoring your OTel instrumented application with Elastic Observability, and while in Elastic APM, you notice some issues highlighted in the service map.\\n\\n![1 - services](/assets/images/opentelemetry-kubernetes-esql/elastic-blog-1-services.png)\\n\\nUsing Elastic AI Assistant, you can easily ask for analysis, and in particular, we check on what the overall latency is across the application services.\\n\\n```plaintext\\nMy APM data is in traces-apm*. What\'s the average latency per service over the last hour? Use ESQL, the data is mapped to ECS\\n```\\n\\n\\n\\nThe Elastic AI Assistant generates an ES|QL query, which we run in the AI Assistant to get a list of the average latencies across all the application services. We can easily see the top four are:\\n\\n- load generator\\n- front-end proxy\\n- frontendservice\\n- checkoutservice\\n\\nWith a simple natural language query in the AI Assistant, it generated a single ES|QL query that helped list out the latencies across the services.\\n\\nNoticing that there is an issue with several services, we decide to start with the frontend proxy. As we work through the details, we see significant failures, and through **Elastic APM failure correlation** , it becomes apparent that the frontend proxy is not properly completing its calls to downstream services.\\n\\n![2 - failed transaction](/assets/images/opentelemetry-kubernetes-esql/elastic-blog-2-failed-transaction.png)\\n\\n## ES|QL insightful and contextual analysis in Discover\\n\\nKnowing that the application is running on Kubernetes, we investigate if there are issues in Kubernetes. In particular, we want to see if there are any services having issues.\\n\\nWe use the following query in ES|QL in Elastic Discover:\\n\\n```sql\\nfrom metrics-* | where kubernetes.container.status.last_terminated_reason != \\"\\" and kubernetes.namespace == \\"default\\" | stats reason_count=count(kubernetes.container.status.last_terminated_reason) by kubernetes.container.name, kubernetes.container.status.last_terminated_reason | where reason_count > 0\\n```\\n\\n![3 - horizontal graph](/assets/images/opentelemetry-kubernetes-esql/elastic-blog-3-two-horizontal-bar-graphs.png)\\n\\nES|QL helps analyze 1,000s/10,000s of metric events from Kubernetes and highlights two services that are restarting due to OOMKilled.\\n\\nThe Elastic AI Assistant, when asked about OOMKilled, indicates that a container in a pod was killed due to an out-of-memory condition.\\n\\n![4 - understanding oomkilled](/assets/images/opentelemetry-kubernetes-esql/elastic-blog-4-understanding-oomkilled.png)\\n\\nWe run another ES|QL query to understand the memory usage for emailservice and productcatalogservice.\\n\\n![5 - split bar graphs](/assets/images/opentelemetry-kubernetes-esql/elastic-blog-5-split-bar-graphs.png)\\n\\nES|QL easily found the average memory usage fairly high.\\n\\nWe can now further investigate both of these services’ logs, metrics, and Kubernetes-related data. However, before we continue, we create an alert to track heavy memory usage.\\n\\n## Actionable alerts with ES|QL\\n\\nSuspecting a specific issue, that might recur, we simply create an alert that brings in the ES|QL query we just ran that will track for any service that exceeds 50% in memory utilization.\\n\\nWe modify the last query to find any service with high memory usage:\\n\\n```sql\\nFROM metrics*\\n| WHERE @timestamp >= NOW() - 1 hours\\n| STATS avg_memory_usage = AVG(kubernetes.pod.memory.usage.limit.pct) BY kubernetes.deployment.name | where avg_memory_usage > .5\\n```\\n\\nWith that query, we create a simple alert. Notice how the ES|QL query is brought into the alert. We simply connect this to pager duty. But we can choose from multiple connectors like ServiceNow, Opsgenie, email, etc.\\n\\n![6 - create rule](/assets/images/opentelemetry-kubernetes-esql/elastic-blog-6-create-rule.png)\\n\\nWith this alert, we can now easily monitor for any services that exceed 50% memory utilization in their pods.\\n\\n## Make the most of your data with ES|QL\\n\\nIn this post, we demonstrated the power ES|QL brings to analysis, operations, and reducing MTTR. In summary, the three use cases with ES|QL in Elastic Observability are as follows:\\n\\n- ES|QL integrated with the Elastic AI Assistant, which uses public LLM and private data, enhances the analysis experience anywhere in Elastic Observability.\\n- SREs can, in a single ES|QL query, break down, analyze, and visualize observability data from multiple sources and across any time frame.\\n- Actionable alerts can be easily created from a single ES|QL query, enhancing operations.\\n\\nElastic invites SREs and developers to experience this transformative language firsthand and unlock new horizons in their data tasks. Try it today at [https://ela.st/free-trial](https://ela.st/free-trial) now in technical preview.\\n\\n> - [Elastic Observability Tour](https://www.elastic.co/demo-gallery/observability)\\n> - [The power of effective log management](https://www.elastic.co/blog/log-management-observability-operations)\\n> - [Transforming Observability with the AI Assistant](https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability)\\n> - [ES|QL announcement blog](https://www.elastic.co/blog/esql-elasticsearch-piped-query-language)\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var u=Object.create;var s=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),f=(i,e)=>{for(var t in e)s(i,t,{get:e[t],enumerable:!0})},l=(i,e,t,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of p(e))!y.call(i,a)&&a!==t&&s(i,a,{get:()=>e[a],enumerable:!(r=g(e,a))||r.enumerable});return i};var b=(i,e,t)=>(t=i!=null?u(m(i)):{},l(e||!i||!i.__esModule?s(t,\\"default\\",{value:i,enumerable:!0}):t,i)),v=i=>l(s({},\\"__esModule\\",{value:!0}),i);var c=w((A,o)=>{o.exports=_jsx_runtime});var L={};f(L,{default:()=>d,frontmatter:()=>E});var n=b(c()),E={title:\\"Optimizing Observability with ES|QL: Streamlining SRE operations and issue resolution for Kubernetes and OTel\\",slug:\\"opentelemetry-kubernetes-esql\\",date:\\"2023-11-01\\",description:\\"ES|QL enhances operational efficiency, data analysis, and issue resolution for SREs. This blog covers the advantages of ES|QL in Elastic Observability and how it can apply to managing issues instrumented with OpenTelemetry and running on Kubernetes.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"ES_QL_blog-720x420-05.png\\",tags:[{slug:\\"log-analytics\\"},{slug:\\"es-ql\\"},{slug:\\"kubernetes\\"},{slug:\\"opentelemetry\\"},{slug:\\"ai-assistant\\"},{slug:\\"genai\\"}]};function h(i){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components},{Video:t}=e;return t||S(\\"Video\\",!0),(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"As an operations engineer (SRE, IT Operations, DevOps), managing technology and data sprawl is an ongoing challenge. Simply managing the large volumes of high dimensionality and high cardinality data is overwhelming.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"As a single platform, Elastic\\\\xAE helps SREs unify and correlate limitless telemetry data, including metrics, logs, traces, and profiling, into a single datastore \\\\u2014 Elasticsearch\\\\xAE. By then applying the power of Elastic\\\\u2019s advanced machine learning (ML), AIOps, AI Assistant, and analytics, you can break down silos and turn data into insights. As a full-stack observability solution, everything from infrastructure monitoring to log monitoring and application performance monitoring (APM) can be found in a single, unified experience.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In Elastic 8.11, a technical preview is now available of \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/esql-elasticsearch-piped-query-language\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s new piped query language, ES|QL (Elasticsearch Query Language)\\"}),\\", which transforms, enriches, and simplifies data investigations. Powered by a new query engine, ES|QL delivers advanced search capabilities with concurrent processing, improving speed and efficiency, irrespective of data source and structure. Accelerate resolution by creating aggregations and visualizations from one screen, delivering an iterative, uninterrupted workflow.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"advantages-of-esql-for-sres\\",children:\\"Advantages of ES|QL for SREs\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"SREs using Elastic Observability can leverage ES|QL to analyze logs, metrics, traces, and profiling data, enabling them to pinpoint performance bottlenecks and system issues with a single query. SREs gain the following advantages when managing high dimensionality and high cardinality data with ES|QL in Elastic Observability:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Improved operational efficiency:\\"}),\\" By using ES|QL, SREs can create more actionable notifications with aggregated values as thresholds from a single query, which can also be managed through the Elastic API and integrated into DevOps processes.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Enhanced analysis with insights:\\"}),\\" ES|QL can process diverse observability data, including application, infrastructure, business data, and more, regardless of the source and structure. ES|QL can easily enrich the data with additional fields and context, allowing the creation of visualizations for dashboards or issue analysis with a single query.\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[(0,n.jsx)(e.strong,{children:\\"Reduced mean time to resolution:\\"}),\\" ES|QL, when combined with Elastic Observability\'s AIOps and AI Assistant, enhances detection accuracy by identifying trends, isolating incidents, and reducing false positives. This improvement in context facilitates troubleshooting and the quick pinpointing and resolution of issues.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"ES|QL in Elastic Observability not only enhances an SRE\'s ability to manage the customer experience, an organization\'s revenue, and SLOs more effectively but also facilitates collaboration with developers and DevOps by providing contextualized aggregated data.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this blog, we will cover some of the key use cases SREs can leverage with ES|QL:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"ES|QL integrated with the Elastic AI Assistant, which uses public LLM and private data, enhances the analysis experience anywhere in Elastic Observability.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"SREs can, in a single ES|QL query, break down, analyze, and visualize observability data from multiple sources and across any time frame.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Actionable alerts can be easily created from a single ES|QL query, enhancing operations.\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"I will work through these use cases by showcasing how an SRE can solve a problem in an application instrumented with OpenTelemetry and running on Kubernetes. The OpenTelemetry (OTel) demo is on an Amazon EKS cluster, with Elastic Cloud 8.11 configured.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You can also check out our \\",(0,n.jsx)(e.a,{href:\\"https://www.youtube.com/watch?v=vm0pBWI2l9c\\",rel:\\"nofollow\\",children:\\"Elastic Observability ES|QL Demo\\"}),\\", which walks through ES|QL functionality for Observability.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"esql-with-ai-assistant\\",children:\\"ES|QL with AI Assistant\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"As an SRE, you are monitoring your OTel instrumented application with Elastic Observability, and while in Elastic APM, you notice some issues highlighted in the service map.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/opentelemetry-kubernetes-esql/elastic-blog-1-services.png\\",alt:\\"1 - services\\",width:\\"1522\\",height:\\"1163\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Using Elastic AI Assistant, you can easily ask for analysis, and in particular, we check on what the overall latency is across the application services.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-plaintext\\",children:`My APM data is in traces-apm*. What\'s the average latency per service over the last hour? Use ESQL, the data is mapped to ECS\\n`})}),`\\n`,(0,n.jsx)(t,{vidyardUuid:\\"wHJpzouDQHB51UftmkHFyo\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The Elastic AI Assistant generates an ES|QL query, which we run in the AI Assistant to get a list of the average latencies across all the application services. We can easily see the top four are:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"load generator\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"front-end proxy\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"frontendservice\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"checkoutservice\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"With a simple natural language query in the AI Assistant, it generated a single ES|QL query that helped list out the latencies across the services.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Noticing that there is an issue with several services, we decide to start with the frontend proxy. As we work through the details, we see significant failures, and through \\",(0,n.jsx)(e.strong,{children:\\"Elastic APM failure correlation\\"}),\\" , it becomes apparent that the frontend proxy is not properly completing its calls to downstream services.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/opentelemetry-kubernetes-esql/elastic-blog-2-failed-transaction.png\\",alt:\\"2 - failed transaction\\",width:\\"1534\\",height:\\"1141\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"esql-insightful-and-contextual-analysis-in-discover\\",children:\\"ES|QL insightful and contextual analysis in Discover\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Knowing that the application is running on Kubernetes, we investigate if there are issues in Kubernetes. In particular, we want to see if there are any services having issues.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We use the following query in ES|QL in Elastic Discover:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-sql\\",children:`from metrics-* | where kubernetes.container.status.last_terminated_reason != \\"\\" and kubernetes.namespace == \\"default\\" | stats reason_count=count(kubernetes.container.status.last_terminated_reason) by kubernetes.container.name, kubernetes.container.status.last_terminated_reason | where reason_count > 0\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/opentelemetry-kubernetes-esql/elastic-blog-3-two-horizontal-bar-graphs.png\\",alt:\\"3 - horizontal graph\\",width:\\"1532\\",height:\\"716\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"ES|QL helps analyze 1,000s/10,000s of metric events from Kubernetes and highlights two services that are restarting due to OOMKilled.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The Elastic AI Assistant, when asked about OOMKilled, indicates that a container in a pod was killed due to an out-of-memory condition.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/opentelemetry-kubernetes-esql/elastic-blog-4-understanding-oomkilled.png\\",alt:\\"4 - understanding oomkilled\\",width:\\"1528\\",height:\\"1174\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We run another ES|QL query to understand the memory usage for emailservice and productcatalogservice.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/opentelemetry-kubernetes-esql/elastic-blog-5-split-bar-graphs.png\\",alt:\\"5 - split bar graphs\\",width:\\"1526\\",height:\\"747\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"ES|QL easily found the average memory usage fairly high.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We can now further investigate both of these services\\\\u2019 logs, metrics, and Kubernetes-related data. However, before we continue, we create an alert to track heavy memory usage.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"actionable-alerts-with-esql\\",children:\\"Actionable alerts with ES|QL\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Suspecting a specific issue, that might recur, we simply create an alert that brings in the ES|QL query we just ran that will track for any service that exceeds 50% in memory utilization.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We modify the last query to find any service with high memory usage:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-sql\\",children:`FROM metrics*\\n| WHERE @timestamp >= NOW() - 1 hours\\n| STATS avg_memory_usage = AVG(kubernetes.pod.memory.usage.limit.pct) BY kubernetes.deployment.name | where avg_memory_usage > .5\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"With that query, we create a simple alert. Notice how the ES|QL query is brought into the alert. We simply connect this to pager duty. But we can choose from multiple connectors like ServiceNow, Opsgenie, email, etc.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/opentelemetry-kubernetes-esql/elastic-blog-6-create-rule.png\\",alt:\\"6 - create rule\\",width:\\"621\\",height:\\"1076\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"With this alert, we can now easily monitor for any services that exceed 50% memory utilization in their pods.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"make-the-most-of-your-data-with-esql\\",children:\\"Make the most of your data with ES|QL\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this post, we demonstrated the power ES|QL brings to analysis, operations, and reducing MTTR. In summary, the three use cases with ES|QL in Elastic Observability are as follows:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"ES|QL integrated with the Elastic AI Assistant, which uses public LLM and private data, enhances the analysis experience anywhere in Elastic Observability.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"SREs can, in a single ES|QL query, break down, analyze, and visualize observability data from multiple sources and across any time frame.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Actionable alerts can be easily created from a single ES|QL query, enhancing operations.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elastic invites SREs and developers to experience this transformative language firsthand and unlock new horizons in their data tasks. Try it today at \\",(0,n.jsx)(e.a,{href:\\"https://ela.st/free-trial\\",rel:\\"nofollow\\",children:\\"https://ela.st/free-trial\\"}),\\" now in technical preview.\\"]}),`\\n`,(0,n.jsxs)(e.blockquote,{children:[`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/demo-gallery/observability\\",rel:\\"nofollow\\",children:\\"Elastic Observability Tour\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/log-management-observability-operations\\",rel:\\"nofollow\\",children:\\"The power of effective log management\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability\\",rel:\\"nofollow\\",children:\\"Transforming Observability with the AI Assistant\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/esql-elasticsearch-piped-query-language\\",rel:\\"nofollow\\",children:\\"ES|QL announcement blog\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,n.jsx)(e,{...i,children:(0,n.jsx)(h,{...i})}):h(i)}function S(i,e){throw new Error(\\"Expected \\"+(e?\\"component\\":\\"object\\")+\\" `\\"+i+\\"` to be defined: you likely forgot to import, pass, or provide it.\\")}return v(L);})();\\n;return Component;"},"_id":"articles/opentelemetry-kubernetes-problem-resolution-elastic-observability-esql.mdx","_raw":{"sourceFilePath":"articles/opentelemetry-kubernetes-problem-resolution-elastic-observability-esql.mdx","sourceFileName":"opentelemetry-kubernetes-problem-resolution-elastic-observability-esql.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/opentelemetry-kubernetes-problem-resolution-elastic-observability-esql"},"type":"Article","imageUrl":"/assets/images/opentelemetry-kubernetes-esql/ES_QL_blog-720x420-05.png","readingTime":"7 min read","url":"/opentelemetry-kubernetes-esql","headings":[{"level":2,"title":"Advantages of ES|QL for SREs","href":"#advantages-of-esql-for-sres"},{"level":2,"title":"ES|QL with AI Assistant","href":"#esql-with-ai-assistant"},{"level":2,"title":"ES|QL insightful and contextual analysis in Discover","href":"#esql-insightful-and-contextual-analysis-in-discover"},{"level":2,"title":"Actionable alerts with ES|QL","href":"#actionable-alerts-with-esql"},{"level":2,"title":"Make the most of your data with ES|QL","href":"#make-the-most-of-your-data-with-esql"}]},{"title":"Independence with OpenTelemetry on Elastic","slug":"opentelemetry-observability","date":"2022-11-15","description":"OpenTelemetry has become a key component for observability given its open standards and developer-friendly tools. See how easily Elastic Observability integrates with OTel to provide a platform that minimizes vendor lock-in and maximizes flexibility.","image":"illustration-scalability-gear-1680x980_(1).jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"google-cloud","type":"Tag","_raw":{}},{"slug":"gke","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"elastic-agent","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe drive for faster, more scalable services is on the rise. Our day-to-day lives depend on apps, from a food delivery app to have your favorite meal delivered, to your banking app to manage your accounts, to even apps to schedule doctor’s appointments. These apps need to be able to grow from not only a features standpoint but also in terms of user capacity. The scale and need for global reach drives increasing complexity for these high-demand cloud applications.\\n\\nIn order to keep pace with demand, most of these online apps and services (for example, mobile applications, web pages, SaaS) are moving to a distributed microservice-based architecture and Kubernetes. Once you’ve migrated your app to the cloud, how do you manage and monitor production, scale, and availability of the service? [OpenTelemetry](https://opentelemetry.io/) is quickly becoming the de facto standard for instrumentation and collecting application telemetry data for Kubernetes applications.\\n\\n[OpenTelemetry (OTel)](https://www.elastic.co/what-is/opentelemetry) is an open source project providing a collection of tools, APIs, and SDKs that can be used to generate, collect, and export telemetry data (metrics, logs, and traces) to understand software performance and behavior. OpenTelemetry recently became a CNCF incubating project and has a significant amount of growing community and vendor support.\\n\\nWhile OTel provides a standard way to instrument applications with a standard telemetry format, it doesn’t provide any backend or analytics components. Hence using OTel libraries in applications, infrastructure, and user experience monitoring provides flexibility in choosing the appropriate [observability tool](https://www.elastic.co/observability) of choice. There is no longer any vendor lock-in for application performance monitoring (APM).\\n\\n![](/assets/images/opentelemetry-observability/blog-elastic-otel-1.png)\\n\\nElastic Observability natively supports OpenTelemetry and its OpenTelemetry protocol (OTLP) to ingest traces, metrics, and logs. All of Elastic Observability’s APM capabilities are available with OTel data. Hence the following capabilities (and more) are available for OTel data:\\n\\n- Service maps\\n- Service details (latency, throughput, failed transactions)\\n- Dependencies between services\\n- Transactions (traces)\\n- ML correlations (specifically for latency)\\n- Service logs\\n\\nIn addition to Elastic’s APM and unified view of the telemetry data, you will now be able to use Elastic’s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\n\\n![](/assets/images/opentelemetry-observability/blog-elastic-otel-2.png)\\n\\nGiven its open source heritage, Elastic also supports other CNCF based projects, such as Prometheus, Fluentd, Fluent Bit, Istio, Kubernetes (K8S), and many more.\\n\\nThis blog will show:\\n\\n- How to get a popular OTel instrumented demo app (Hipster Shop) configured to ingest into [Elastic Cloud](https://cloud.elastic.co) through a few easy steps\\n- Highlight some of the Elastic APM capabilities and features around OTel data and what you can do with this data once it’s in Elastic\\n\\nIn follow-up blogs, we will detail how to use Elastic’s machine learning with OTel telemetry data, how to instrument OTel application metrics for specific languages, how we can support Prometheus ingest through the OTel collector, and more. Stay tuned!\\n\\n## Prerequisites and config\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up the configuration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)).\\n- We used the OpenTelemetry Demo. Directions for using Elastic with OpenTelemetry Demo are [here](https://github.com/elastic/opentelemetry-demo).\\n- Make sure you have [kubectl](https://kubernetes.io/docs/reference/kubectl/) and [helm](https://helm.sh/) also installed locally.\\n- Additionally, we are using an OTel manually instrumented version of the application. No OTel automatic instrumentation was used in this blog configuration.\\n- Location of our clusters. While we used Google Kubernetes Engine (GKE), you can use any Kubernetes platform of your choice.\\n- While Elastic can ingest telemetry directly from OTel instrumented services, we will focus on the more traditional deployment, which uses the OpenTelemetry Collector.\\n- Prometheus and FluentD/Fluent Bit — traditionally used to pull all Kubernetes data — is not being used here versus Kubernetes Agents. Follow-up blogs will showcase this.\\n\\nHere is the configuration we will get set up in this blog:\\n\\n![Configuration to ingest OpenTelemetry data used in this blog](/assets/images/opentelemetry-observability/blog-elastic-otel-3.png)\\n\\n## Setting it all up\\n\\nOver the next few steps, I’ll walk through an [Opentelemetry visualization](https://www.elastic.co/observability/opentelemetry):\\n\\n- Getting an account on Elastic Cloud\\n- Bringing up a GKE cluster\\n- Bringing up the application\\n- Configuring Kubernetes OTel Collector configmap to point to Elastic Cloud\\n- Using Elastic Observability APM with OTel data for improved visibility\\n\\n### Step 0: Create an account on Elastic Cloud\\n\\nFollow the instructions to [get started on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home).\\n\\n![](/assets/images/opentelemetry-observability/blog-elastic-otel-4.png)\\n\\n### Step 1: Bring up a K8S cluster\\n\\nWe used Google Kubernetes Engine (GKE), but you can use any Kubernetes platform of your choice.\\n\\nThere are no special requirements for Elastic to collect OpenTelemetry data from a Kubernetes cluster. Any normal Kubernetes cluster on GKE, EKS, AKS, or Kubernetes compliant cluster (self-deployed and managed) works.\\n\\n### Step 2: Load the OpenTelemetry demo application on the cluster\\n\\nGet your application on a Kubernetes cluster in your cloud service of choice or local Kubernetes platform. The application I am using is available [here](https://github.com/bshetti/opentelemetry-microservices-demo/tree/main/deploy-with-collector-k8s).\\n\\nFirst clone the directory locally:\\n\\n```bash\\ngit clone https://github.com/elastic/opentelemetry-demo.git\\n```\\n\\n(Make sure you have [kubectl](https://kubernetes.io/docs/reference/kubectl/) and [helm](https://helm.sh/) also installed locally.)\\n\\nThe instructions utilize a specific opentelemetry-collector configuration for Elastic. Essentially, the Elastic [values.yaml](https://github.com/elastic/opentelemetry-demo/blob/main/kubernetes/elastic-helm/values.yaml) file specified in the elastic/opentelemetry-demo configure the opentelemetry-collector to point to the Elastic APM Server using two main values:\\n\\nOTEL_EXPORTER_OTLP_ENDPOINT is Elastic’s APM Server \\nOTEL_EXPORTER_OTLP_HEADERS Elastic Authorization\\n\\nThese two values can be found in the OpenTelemetry setup instructions under the APM integration instructions (Integrations-\\\\>APM) in your Elastic cloud.\\n\\n![elastic apm agents](/assets/images/opentelemetry-observability/blog-elastic-apm-agents.png)\\n\\nOnce you obtain this, the first step is to create a secret key on the cluster with your Elastic APM server endpoint, and your APM Secret Token with the following instruction:\\n\\n```bash\\nkubectl create secret generic elastic-secret \\\\\\n --from-literal=elastic_apm_endpoint=\'YOUR_APM_ENDPOINT_WITHOUT_HTTPS_PREFIX\' \\\\\\n --from-literal=elastic_apm_secret_token=\'YOUR_APM_SECRET_TOKEN\'\\n```\\n\\nDon\'t forget to replace:\\n\\n- YOUR_APM_ENDPOINT_WITHOUT_HTTPS_PREFIX: your Elastic APM endpoint ( **without https:// prefix** ) with OTEL_EXPORTER_OTLP_ENDPOINT\\n- YOUR_APM_SECRET_TOKEN: your Elastic APM secret token OTEL_EXPORTER_OTLP_HEADERS\\n\\nNow execute the following commands:\\n\\n```bash\\n# switch to the kubernetes/elastic-helm directory\\ncd kubernetes/elastic-helm\\n\\n# add the open-telemetry Helm repostiroy\\nhelm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts\\n\\n# deploy the demo through helm install\\nhelm install -f values.yaml my-otel-demo open-telemetry/opentelemetry-demo\\n```\\n\\nOnce your application is up on Kubernetes, you will have the following pods (or some variant) running on the **default** namespace.\\n\\n```bash\\nkubectl get pods -n default\\n```\\n\\nOutput should be similar to the following:\\n\\n```bash\\nNAME READY STATUS RESTARTS AGE\\nmy-otel-demo-accountingservice-5c77754b4f-vwph6 1/1 Running 0 5d4h\\nmy-otel-demo-adservice-6b8b7c7dc5-mb7j5 1/1 Running 0 5d4h\\nmy-otel-demo-cartservice-76d94b7dcd-2g4lf 1/1 Running 0 5d4h\\nmy-otel-demo-checkoutservice-988bbdb88-hmkrp 1/1 Running 0 5d4h\\nmy-otel-demo-currencyservice-6cf4b5f9f6-vz9t2 1/1 Running 0 5d4h\\nmy-otel-demo-emailservice-868c98fd4b-lpr7n 1/1 Running 6 (18h ago) 5d4h\\nmy-otel-demo-featureflagservice-8446ff9c94-lzd4w 1/1 Running 0 5d4h\\nmy-otel-demo-ffspostgres-867945d9cf-zzwd7 1/1 Running 0 5d4h\\nmy-otel-demo-frauddetectionservice-5c97c589b9-z8fhz 1/1 Running 0 5d4h\\nmy-otel-demo-frontend-d85ccf677-zg9fp 1/1 Running 0 5d4h\\nmy-otel-demo-frontendproxy-6c5c4fccf6-qmldp 1/1 Running 0 5d4h\\nmy-otel-demo-kafka-68bcc66794-dsbr6 1/1 Running 0 5d4h\\nmy-otel-demo-loadgenerator-64c545b974-xfccq 1/1 Running 1 (36h ago) 5d4h\\nmy-otel-demo-otelcol-fdfd9c7cf-6lr2w 1/1 Running 0 5d4h\\nmy-otel-demo-paymentservice-7955c68859-ff7zg 1/1 Running 0 5d4h\\nmy-otel-demo-productcatalogservice-67c879657b-wn2wj 1/1 Running 0 5d4h\\nmy-otel-demo-quoteservice-748d754ffc-qcwm4 1/1 Running 0 5d4h\\nmy-otel-demo-recommendationservice-df78894c7-lwm5v 1/1 Running 0 5d4h\\nmy-otel-demo-redis-7d48567546-h4p4t 1/1 Running 0 5d4h\\nmy-otel-demo-shippingservice-f6fc76ddd-2v7qv 1/1 Running 0 5d4h\\n```\\n\\n### Step 3: Open Kibana and use the APM Service Map to view your OTel instrumented Services\\n\\nIn the Elastic Observability UI under APM, select servicemap to see your services.\\n\\n![elastic observability APM](/assets/images/opentelemetry-observability/blog-elastic-observability-APM.png)\\n\\n![elastic observability OTEL service map](/assets/images/opentelemetry-observability/blog-elastic-observability-OTEL-service-map.png)\\n\\nIf you are seeing this, then the OpenTelemetry Collector is sending data into Elastic:\\n\\n_Congratulations,_ _you\'ve instrumented the OpenTelemetry demo application using and successfully ingested the telemetry data into the Elastic!_\\n\\n### Step 4: What can Elastic show me?\\n\\nNow that the OpenTelemetry data is ingested into Elastic, what can you do?\\n\\nFirst, you can view the APM service map (as shown in the previous step) — this will give you a full view of all the services and the transaction flows between services.\\n\\nNext, you can now check out individual services and the transactions being collected.\\n\\n![elastic observability frontend overview](/assets/images/opentelemetry-observability/blog-elastic-observability-frontend-overview.png)\\n\\nAs you can see, the frontend details are listed. Everything from:\\n\\n- Average service latency\\n- Throughput\\n- Main transactions\\n- Failed traction rate\\n- Errors\\n- Dependencies\\n\\nLet’s get to the trace. In the Transactions tab, you can review all the types of transactions related to the frontend service:\\n\\n![elastic observability frontend transactions](/assets/images/opentelemetry-observability/blog-elastic-observability-frontend-transactions.png)\\n\\nSelecting the HTTP POST transaction, we can see the full trace with all the spans:\\n\\n![Average latency for this transaction, throughput, any failures, and of course the trace!](/assets/images/opentelemetry-observability/blog-elastic-observability-frontend-HTTP-POST.png)\\n\\nNot only can you review the trace but you can also analyze what is related to higher than normal latency for HTTP POST .\\n\\nElastic uses machine learning to help identify any potential latency issues across the services from the trace. It’s as simple as selecting the Latency Correlations tab and running the correlation.\\n\\n![elastic observability latency correlations](/assets/images/opentelemetry-observability/blog-elastic-latency-correlations.png)\\n\\nThis shows that the high latency transactions are occurring in checkout service with a medium correlation.\\n\\nYou can then drill down into logs directly from the trace view and review the logs associated with the trace to help identify and pinpoint potential issues.\\n\\n![elastic observability latency distribution](/assets/images/opentelemetry-observability/blog-elastic-latency-distribution.png)\\n\\n### Analyze your data with Elastic machine learning (ML)\\n\\nOnce OpenTelemetry metrics are in Elastic, start analyzing your data through Elastic’s ML capabilities.\\n\\nA great review of these features can be found here: [Correlating APM telemetry to determine root causes in transactions](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions). And there are many more videos and blogs on [Elastic’s Blog](https://www.elastic.co/blog/). We’ll follow up with additional blogs on leveraging Elastic’s machine learning capabilities for OpenTelemetry data.\\n\\n## Conclusion\\n\\nI hope you’ve gotten an appreciation for how Elastic Observability can help you ingest and analyze OpenTelemetry data with Elastic’s APM capabilities.\\n\\nA quick recap of lessons and more specifically learned:\\n\\n- How to get a popular OTel instrumented demo app (Hipster Shop) configured to ingest into [Elastic Cloud](https://cloud.elastic.co), through a few easy steps\\n- Highlight some of the Elastic APM capabilities and features around OTel data and what you can do with this once it’s in Elastic\\n\\nReady to get started? Sign up [for Elastic Cloud](https://cloud.elastic.co/registration) and try out the features and capabilities I’ve outlined above to get the most value and visibility out of your OpenTelemetry data.\\n","code":"var Component=(()=>{var p=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var b=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var i in e)o(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let l of m(e))!y.call(n,l)&&l!==i&&o(n,l,{get:()=>e[l],enumerable:!(a=u(e,l))||a.enumerable});return n};var w=(n,e,i)=>(i=n!=null?p(g(n)):{},r(e||!n||!n.__esModule?o(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>r(o({},\\"__esModule\\",{value:!0}),n);var c=b((P,s)=>{s.exports=_jsx_runtime});var E={};f(E,{default:()=>d,frontmatter:()=>T});var t=w(c()),T={title:\\"Independence with OpenTelemetry on Elastic\\",slug:\\"opentelemetry-observability\\",date:\\"2022-11-15\\",description:\\"OpenTelemetry has become a key component for observability given its open standards and developer-friendly tools. See how easily Elastic Observability integrates with OTel to provide a platform that minimizes vendor lock-in and maximizes flexibility.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"illustration-scalability-gear-1680x980_(1).jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"kubernetes\\"},{slug:\\"apm\\"},{slug:\\"cloud-monitoring\\"},{slug:\\"google-cloud\\"},{slug:\\"gke\\"},{slug:\\"apm\\"},{slug:\\"elastic-agent\\"}]};function h(n){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"The drive for faster, more scalable services is on the rise. Our day-to-day lives depend on apps, from a food delivery app to have your favorite meal delivered, to your banking app to manage your accounts, to even apps to schedule doctor\\\\u2019s appointments. These apps need to be able to grow from not only a features standpoint but also in terms of user capacity. The scale and need for global reach drives increasing complexity for these high-demand cloud applications.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In order to keep pace with demand, most of these online apps and services (for example, mobile applications, web pages, SaaS) are moving to a distributed microservice-based architecture and Kubernetes. Once you\\\\u2019ve migrated your app to the cloud, how do you manage and monitor production, scale, and availability of the service? \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" is quickly becoming the de facto standard for instrumentation and collecting application telemetry data for Kubernetes applications.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/what-is/opentelemetry\\",rel:\\"nofollow\\",children:\\"OpenTelemetry (OTel)\\"}),\\" is an open source project providing a collection of tools, APIs, and SDKs that can be used to generate, collect, and export telemetry data (metrics, logs, and traces) to understand software performance and behavior. OpenTelemetry recently became a CNCF incubating project and has a significant amount of growing community and vendor support.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"While OTel provides a standard way to instrument applications with a standard telemetry format, it doesn\\\\u2019t provide any backend or analytics components. Hence using OTel libraries in applications, infrastructure, and user experience monitoring provides flexibility in choosing the appropriate \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability\\",rel:\\"nofollow\\",children:\\"observability tool\\"}),\\" of choice. There is no longer any vendor lock-in for application performance monitoring (APM).\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-otel-1.png\\",alt:\\"\\",width:\\"1123\\",height:\\"467\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic Observability natively supports OpenTelemetry and its OpenTelemetry protocol (OTLP) to ingest traces, metrics, and logs. All of Elastic Observability\\\\u2019s APM capabilities are available with OTel data. Hence the following capabilities (and more) are available for OTel data:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Service maps\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service details (latency, throughput, failed transactions)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Dependencies between services\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Transactions (traces)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"ML correlations (specifically for latency)\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Service logs\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In addition to Elastic\\\\u2019s APM and unified view of the telemetry data, you will now be able to use Elastic\\\\u2019s powerful machine learning capabilities to reduce the analysis, and alerting to help reduce MTTR.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-otel-2.png\\",alt:\\"\\",width:\\"1244\\",height:\\"479\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Given its open source heritage, Elastic also supports other CNCF based projects, such as Prometheus, Fluentd, Fluent Bit, Istio, Kubernetes (K8S), and many more.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This blog will show:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"How to get a popular OTel instrumented demo app (Hipster Shop) configured to ingest into \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" through a few easy steps\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Highlight some of the Elastic APM capabilities and features around OTel data and what you can do with this data once it\\\\u2019s in Elastic\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In follow-up blogs, we will detail how to use Elastic\\\\u2019s machine learning with OTel telemetry data, how to instrument OTel application metrics for specific languages, how we can support Prometheus ingest through the OTel collector, and more. Stay tuned!\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up the configuration:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Ensure you have an account on \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\").\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"We used the OpenTelemetry Demo. Directions for using Elastic with OpenTelemetry Demo are \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Make sure you have \\",(0,t.jsx)(e.a,{href:\\"https://kubernetes.io/docs/reference/kubectl/\\",rel:\\"nofollow\\",children:\\"kubectl\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://helm.sh/\\",rel:\\"nofollow\\",children:\\"helm\\"}),\\" also installed locally.\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Additionally, we are using an OTel manually instrumented version of the application. No OTel automatic instrumentation was used in this blog configuration.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Location of our clusters. While we used Google Kubernetes Engine (GKE), you can use any Kubernetes platform of your choice.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"While Elastic can ingest telemetry directly from OTel instrumented services, we will focus on the more traditional deployment, which uses the OpenTelemetry Collector.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Prometheus and FluentD/Fluent Bit \\\\u2014 traditionally used to pull all Kubernetes data \\\\u2014 is not being used here versus Kubernetes Agents. Follow-up blogs will showcase this.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Here is the configuration we will get set up in this blog:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-otel-3.png\\",alt:\\"Configuration to ingest OpenTelemetry data used in this blog\\",width:\\"1222\\",height:\\"505\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"setting-it-all-up\\",children:\\"Setting it all up\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Over the next few steps, I\\\\u2019ll walk through an \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/opentelemetry\\",rel:\\"nofollow\\",children:\\"Opentelemetry visualization\\"}),\\":\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Getting an account on Elastic Cloud\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Bringing up a GKE cluster\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Bringing up the application\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Configuring Kubernetes OTel Collector configmap to point to Elastic Cloud\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Using Elastic Observability APM with OTel data for improved visibility\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-0-create-an-account-on-elastic-cloud\\",children:\\"Step 0: Create an account on Elastic Cloud\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Follow the instructions to \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-otel-4.png\\",alt:\\"\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-1-bring-up-a-k8s-cluster\\",children:\\"Step 1: Bring up a K8S cluster\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We used Google Kubernetes Engine (GKE), but you can use any Kubernetes platform of your choice.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"There are no special requirements for Elastic to collect OpenTelemetry data from a Kubernetes cluster. Any normal Kubernetes cluster on GKE, EKS, AKS, or Kubernetes compliant cluster (self-deployed and managed) works.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-2-load-the-opentelemetry-demo-application-on-the-cluster\\",children:\\"Step 2: Load the OpenTelemetry demo application on the cluster\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Get your application on a Kubernetes cluster in your cloud service of choice or local Kubernetes platform. The application I am using is available \\",(0,t.jsx)(e.a,{href:\\"https://github.com/bshetti/opentelemetry-microservices-demo/tree/main/deploy-with-collector-k8s\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"First clone the directory locally:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/elastic/opentelemetry-demo.git\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"(Make sure you have \\",(0,t.jsx)(e.a,{href:\\"https://kubernetes.io/docs/reference/kubectl/\\",rel:\\"nofollow\\",children:\\"kubectl\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://helm.sh/\\",rel:\\"nofollow\\",children:\\"helm\\"}),\\" also installed locally.)\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The instructions utilize a specific opentelemetry-collector configuration for Elastic. Essentially, the Elastic \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo/blob/main/kubernetes/elastic-helm/values.yaml\\",rel:\\"nofollow\\",children:\\"values.yaml\\"}),\\" file specified in the elastic/opentelemetry-demo configure the opentelemetry-collector to point to the Elastic APM Server using two main values:\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"OTEL_EXPORTER_OTLP_ENDPOINT is Elastic\\\\u2019s APM Server\\",(0,t.jsx)(e.br,{}),`\\n`,\\"OTEL_EXPORTER_OTLP_HEADERS Elastic Authorization\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"These two values can be found in the OpenTelemetry setup instructions under the APM integration instructions (Integrations->APM) in your Elastic cloud.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-apm-agents.png\\",alt:\\"elastic apm agents\\",width:\\"961\\",height:\\"715\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you obtain this, the first step is to create a secret key on the cluster with your Elastic APM server endpoint, and your APM Secret Token with the following instruction:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl create secret generic elastic-secret \\\\\\\\\\n --from-literal=elastic_apm_endpoint=\'YOUR_APM_ENDPOINT_WITHOUT_HTTPS_PREFIX\' \\\\\\\\\\n --from-literal=elastic_apm_secret_token=\'YOUR_APM_SECRET_TOKEN\'\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Don\'t forget to replace:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"YOUR_APM_ENDPOINT_WITHOUT_HTTPS_PREFIX: your Elastic APM endpoint ( \\",(0,t.jsx)(e.strong,{children:\\"without https:// prefix\\"}),\\" ) with OTEL_EXPORTER_OTLP_ENDPOINT\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"YOUR_APM_SECRET_TOKEN: your Elastic APM secret token OTEL_EXPORTER_OTLP_HEADERS\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now execute the following commands:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`# switch to the kubernetes/elastic-helm directory\\ncd kubernetes/elastic-helm\\n\\n# add the open-telemetry Helm repostiroy\\nhelm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts\\n\\n# deploy the demo through helm install\\nhelm install -f values.yaml my-otel-demo open-telemetry/opentelemetry-demo\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once your application is up on Kubernetes, you will have the following pods (or some variant) running on the \\",(0,t.jsx)(e.strong,{children:\\"default\\"}),\\" namespace.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`kubectl get pods -n default\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Output should be similar to the following:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`NAME READY STATUS RESTARTS AGE\\nmy-otel-demo-accountingservice-5c77754b4f-vwph6 1/1 Running 0 5d4h\\nmy-otel-demo-adservice-6b8b7c7dc5-mb7j5 1/1 Running 0 5d4h\\nmy-otel-demo-cartservice-76d94b7dcd-2g4lf 1/1 Running 0 5d4h\\nmy-otel-demo-checkoutservice-988bbdb88-hmkrp 1/1 Running 0 5d4h\\nmy-otel-demo-currencyservice-6cf4b5f9f6-vz9t2 1/1 Running 0 5d4h\\nmy-otel-demo-emailservice-868c98fd4b-lpr7n 1/1 Running 6 (18h ago) 5d4h\\nmy-otel-demo-featureflagservice-8446ff9c94-lzd4w 1/1 Running 0 5d4h\\nmy-otel-demo-ffspostgres-867945d9cf-zzwd7 1/1 Running 0 5d4h\\nmy-otel-demo-frauddetectionservice-5c97c589b9-z8fhz 1/1 Running 0 5d4h\\nmy-otel-demo-frontend-d85ccf677-zg9fp 1/1 Running 0 5d4h\\nmy-otel-demo-frontendproxy-6c5c4fccf6-qmldp 1/1 Running 0 5d4h\\nmy-otel-demo-kafka-68bcc66794-dsbr6 1/1 Running 0 5d4h\\nmy-otel-demo-loadgenerator-64c545b974-xfccq 1/1 Running 1 (36h ago) 5d4h\\nmy-otel-demo-otelcol-fdfd9c7cf-6lr2w 1/1 Running 0 5d4h\\nmy-otel-demo-paymentservice-7955c68859-ff7zg 1/1 Running 0 5d4h\\nmy-otel-demo-productcatalogservice-67c879657b-wn2wj 1/1 Running 0 5d4h\\nmy-otel-demo-quoteservice-748d754ffc-qcwm4 1/1 Running 0 5d4h\\nmy-otel-demo-recommendationservice-df78894c7-lwm5v 1/1 Running 0 5d4h\\nmy-otel-demo-redis-7d48567546-h4p4t 1/1 Running 0 5d4h\\nmy-otel-demo-shippingservice-f6fc76ddd-2v7qv 1/1 Running 0 5d4h\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-3-open-kibana-and-use-the-apm-service-map-to-view-your-otel-instrumented-services\\",children:\\"Step 3: Open Kibana and use the APM Service Map to view your OTel instrumented Services\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the Elastic Observability UI under APM, select servicemap to see your services.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-observability-APM.png\\",alt:\\"elastic observability APM\\",width:\\"168\\",height:\\"454\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-observability-OTEL-service-map.png\\",alt:\\"elastic observability OTEL service map\\",width:\\"1465\\",height:\\"1109\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you are seeing this, then the OpenTelemetry Collector is sending data into Elastic:\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.em,{children:\\"Congratulations,\\"}),\\" \\",(0,t.jsx)(e.em,{children:\\"you\'ve instrumented the OpenTelemetry demo application using and successfully ingested the telemetry data into the Elastic!\\"})]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"step-4-what-can-elastic-show-me\\",children:\\"Step 4: What can Elastic show me?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that the OpenTelemetry data is ingested into Elastic, what can you do?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"First, you can view the APM service map (as shown in the previous step) \\\\u2014 this will give you a full view of all the services and the transaction flows between services.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Next, you can now check out individual services and the transactions being collected.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-observability-frontend-overview.png\\",alt:\\"elastic observability frontend overview\\",width:\\"1810\\",height:\\"997\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see, the frontend details are listed. Everything from:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Average service latency\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Throughput\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Main transactions\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Failed traction rate\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Errors\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Dependencies\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s get to the trace. In the Transactions tab, you can review all the types of transactions related to the frontend service:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-observability-frontend-transactions.png\\",alt:\\"elastic observability frontend transactions\\",width:\\"1493\\",height:\\"1154\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Selecting the HTTP POST transaction, we can see the full trace with all the spans:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-observability-frontend-HTTP-POST.png\\",alt:\\"Average latency for this transaction, throughput, any failures, and of course the trace!\\",width:\\"1588\\",height:\\"1193\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Not only can you review the trace but you can also analyze what is related to higher than normal latency for HTTP POST .\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic uses machine learning to help identify any potential latency issues across the services from the trace. It\\\\u2019s as simple as selecting the Latency Correlations tab and running the correlation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-latency-correlations.png\\",alt:\\"elastic observability latency correlations\\",width:\\"1486\\",height:\\"799\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This shows that the high latency transactions are occurring in checkout service with a medium correlation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can then drill down into logs directly from the trace view and review the logs associated with the trace to help identify and pinpoint potential issues.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/opentelemetry-observability/blog-elastic-latency-distribution.png\\",alt:\\"elastic observability latency distribution\\",width:\\"1589\\",height:\\"668\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"analyze-your-data-with-elastic-machine-learning-ml\\",children:\\"Analyze your data with Elastic machine learning (ML)\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once OpenTelemetry metrics are in Elastic, start analyzing your data through Elastic\\\\u2019s ML capabilities.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"A great review of these features can be found here: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"Correlating APM telemetry to determine root causes in transactions\\"}),\\". And there are many more videos and blogs on \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s Blog\\"}),\\". We\\\\u2019ll follow up with additional blogs on leveraging Elastic\\\\u2019s machine learning capabilities for OpenTelemetry data.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"I hope you\\\\u2019ve gotten an appreciation for how Elastic Observability can help you ingest and analyze OpenTelemetry data with Elastic\\\\u2019s APM capabilities.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"A quick recap of lessons and more specifically learned:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"How to get a popular OTel instrumented demo app (Hipster Shop) configured to ingest into \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\", through a few easy steps\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Highlight some of the Elastic APM capabilities and features around OTel data and what you can do with this once it\\\\u2019s in Elastic\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Ready to get started? Sign up \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"for Elastic Cloud\\"}),\\" and try out the features and capabilities I\\\\u2019ve outlined above to get the most value and visibility out of your OpenTelemetry data.\\"]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(E);})();\\n;return Component;"},"_id":"articles/opentelemetry-observability.mdx","_raw":{"sourceFilePath":"articles/opentelemetry-observability.mdx","sourceFileName":"opentelemetry-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/opentelemetry-observability"},"type":"Article","imageUrl":"/assets/images/opentelemetry-observability/illustration-scalability-gear-1680x980_(1).jpg","readingTime":"12 min read","url":"/opentelemetry-observability","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"Setting it all up","href":"#setting-it-all-up"},{"level":3,"title":"Step 0: Create an account on Elastic Cloud","href":"#step-0-create-an-account-on-elastic-cloud"},{"level":3,"title":"Step 1: Bring up a K8S cluster","href":"#step-1-bring-up-a-k8s-cluster"},{"level":3,"title":"Step 2: Load the OpenTelemetry demo application on the cluster","href":"#step-2-load-the-opentelemetry-demo-application-on-the-cluster"},{"level":3,"title":"Step 3: Open Kibana and use the APM Service Map to view your OTel instrumented Services","href":"#step-3-open-kibana-and-use-the-apm-service-map-to-view-your-otel-instrumented-services"},{"level":3,"title":"Step 4: What can Elastic show me?","href":"#step-4-what-can-elastic-show-me"},{"level":3,"title":"Analyze your data with Elastic machine learning (ML)","href":"#analyze-your-data-with-elastic-machine-learning-ml"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Optimizing cloud resources and cost with APM metadata in Elastic Observability","slug":"optimize-cloud-resources-apm-observability","date":"2023-08-16","description":"Optimize cloud costs with Elastic APM. Learn how to leverage cloud metadata, calculate pricing, and make smarter decisions for better performance.","image":"illustration-out-of-box-data-vis-1680x980.png","author":[{"slug":"philipp-kahr","type":"Author","_raw":{}},{"slug":"nathan-smith","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"google-cloud","type":"Tag","_raw":{}},{"slug":"cost","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}}],"body":{"raw":"\\nApplication performance monitoring (APM) is much more than capturing and tracking errors and stack traces. Today’s cloud-based businesses deploy applications across various regions and even cloud providers. So, harnessing the power of metadata provided by the Elastic APM agents becomes more critical. Leveraging the metadata, including crucial information like cloud region, provider, and machine type, allows us to track costs across the application stack. In this blog post, we look at how we can use cloud metadata to empower businesses to make smarter and cost-effective decisions, all while improving resource utilization and the user experience.\\n\\nFirst, we need an example application that allows us to monitor infrastructure changes effectively. We use a Python Flask application with the Elastic Python APM agent. The application is a simple calculator taking the numbers as a REST request. We utilize Locust — a simple load-testing tool to evaluate performance under varying workloads.\\n\\nThe next step includes obtaining the pricing information associated with the cloud services. Every cloud provider is different. Most of them offer an option to retrieve pricing through an API. But today, we will focus on Google Cloud and will leverage their pricing calculator to retrieve relevant cost information.\\n\\n## The calculator and Google Cloud pricing\\n\\nTo perform a cost analysis, we need to know the cost of the machines in use. Google provides a billing [API](https://cloud.google.com/billing/v1/how-tos/catalog-api) and [Client Library](https://cloud.google.com/billing/docs/reference/libraries#client-libraries-install-python) to fetch the necessary data programmatically. In this blog, we are not covering the API approach. Instead, the [Google Cloud Pricing Calculator](https://cloud.google.com/products/calculator) is enough. Select the machine type and region in the calculator and set the count 1 instance. It will then report the total estimated cost for this machine. Doing this for an e2-standard-4 machine type results in 107.7071784 US$ for a runtime of 730 hours.\\n\\nNow, let’s go to our Kibana\xae where we will create a new index inside Dev Tools. Since we don’t want to analyze text, we will tell Elasticsearch\xae to treat every text as a keyword. The index name is cloud-billing. I might want to do the same for Azure and AWS, then I can append it to the same index.\\n\\n```bash\\nPUT cloud-billing\\n{\\n \\"mappings\\": {\\n \\"dynamic_templates\\": [\\n {\\n \\"stringsaskeywords\\": {\\n \\"match\\": \\"*\\",\\n \\"match_mapping_type\\": \\"string\\",\\n \\"mapping\\": {\\n \\"type\\": \\"keyword\\"\\n }\\n }\\n }\\n ]\\n }\\n}\\n```\\n\\nNext up is crafting our billing document:\\n\\n```bash\\nPOST cloud-billing/_doc/e2-standard-4_europe-west4\\n{\\n \\"machine\\": {\\n \\"enrichment\\": \\"e2-standard-4_europe-west4\\"\\n },\\n \\"cloud\\": {\\n \\"machine\\": {\\n \\"type\\": \\"e2-standard-4\\"\\n },\\n \\"region\\": \\"europe-west4\\",\\n \\"provider\\": \\"google\\"\\n },\\n \\"stats\\": {\\n \\"cpu\\": 4,\\n \\"memory\\": 8\\n },\\n \\"price\\": {\\n \\"minute\\": 0.002459068,\\n \\"hour\\": 0.14754408,\\n \\"month\\": 107.7071784\\n }\\n}\\n```\\n\\nWe create a document and set a custom ID. This ID matches the instance name and the region since the machines\' costs may differ in each region. Automatic IDs could be problematic because I might want to update what a machine costs regularly. I could use a timestamped index for that and only ever use the latest document matching. But this way, I can update and don’t have to worry about it. I calculated the price down to minute and hour prices as well. The most important thing is the machine.enrichment field, which is the same as the ID. The same instance type can exist in multiple regions, but our enrichment processor is limited to match or range. We create a matching name that can explicitly match as in e2-standard-4_europe-west4. It’s up to you to decide whether you want the cloud provider in there and make it google_e2-standard-4_europ-west-4.\\n\\n## Calculating the cost\\n\\nThere are multiple ways of achieving this in the Elastic Stack. In this case, we will use an enrich policy, ingest pipeline, and transform.\\n\\nThe enrich policy is rather easy to setup:\\n\\n```bash\\nPUT _enrich/policy/cloud-billing\\n{\\n \\"match\\": {\\n \\"indices\\": \\"cloud-billing\\",\\n \\"match_field\\": \\"machine.enrichment\\",\\n \\"enrich_fields\\": [\\"price.minute\\", \\"price.hour\\", \\"price.month\\"]\\n }\\n}\\n\\nPOST _enrich/policy/cloud-billing/_execute\\n```\\n\\nDon’t forget to run the \\\\_execute at the end of it. This is necessary to make the internal indices used by the enrichment in the ingest pipeline. The ingest pipeline is rather minimalistic — it calls the enrichment and renames a field. This is where our machine.enrichment field comes in. One caveat around enrichment is that when you add new documents to the cloud-billing index, you need to rerun the \\\\_execute statement. The last bit calculates the total cost with the count of unique machines seen.\\n\\n```bash\\nPUT _ingest/pipeline/cloud-billing\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"_temp.machine_type\\",\\n \\"value\\": \\"{{cloud.machine.type}}_{{cloud.region}}\\"\\n }\\n },\\n {\\n \\"enrich\\": {\\n \\"policy_name\\": \\"cloud-billing\\",\\n \\"field\\": \\"_temp.machine_type\\",\\n \\"target_field\\": \\"enrichment\\"\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"enrichment.price\\",\\n \\"target_field\\": \\"price\\"\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"field\\": [\\n \\"_temp\\",\\n \\"enrichment\\"\\n ]\\n }\\n },\\n {\\n \\"script\\": {\\n \\"source\\": \\"ctx.total_price=ctx.count_machines*ctx.price.hour\\"\\n }\\n }\\n ]\\n}\\n```\\n\\nSince this is all configured now, we are ready for our Transform. For this, we need a data view that matches the APM data_streams. This is traces-apm\\\\*, metrics-apm.\\\\*, logs-apm.\\\\*. For the Transform, go to the Transform UI in Kibana and configure it in the following way:\\n\\n![transform configuration](/assets/images/optimize-cloud-resources-apm-observability/elastic-blog-1-transform-configuration.png)\\n\\nWe are doing an hourly breakdown, therefore, I get a document per service, per hour, per machine type. The interesting bit is the aggregations. I want to see the average CPU usage and the 75,95,99 percentile, to view the CPU usage on an hourly basis. Allowing me to identify the CPU usage across an hour. At the bottom, give the transform a name and select an index cloud-costs and select the cloud-billing ingest pipeline.\\n\\nHere is the entire transform as a JSON document:\\n\\n```bash\\nPUT _transform/cloud-billing\\n{\\n \\"source\\": {\\n \\"index\\": [\\n \\"traces-apm*\\",\\n \\"metrics-apm.*\\",\\n \\"logs-apm.*\\"\\n ],\\n \\"query\\": {\\n \\"bool\\": {\\n \\"filter\\": [\\n {\\n \\"bool\\": {\\n \\"should\\": [\\n {\\n \\"exists\\": {\\n \\"field\\": \\"cloud.provider\\"\\n }\\n }\\n ],\\n \\"minimum_should_match\\": 1\\n }\\n }\\n ]\\n }\\n }\\n },\\n \\"pivot\\": {\\n \\"group_by\\": {\\n \\"@timestamp\\": {\\n \\"date_histogram\\": {\\n \\"field\\": \\"@timestamp\\",\\n \\"calendar_interval\\": \\"1h\\"\\n }\\n },\\n \\"cloud.provider\\": {\\n \\"terms\\": {\\n \\"field\\": \\"cloud.provider\\"\\n }\\n },\\n \\"cloud.region\\": {\\n \\"terms\\": {\\n \\"field\\": \\"cloud.region\\"\\n }\\n },\\n \\"cloud.machine.type\\": {\\n \\"terms\\": {\\n \\"field\\": \\"cloud.machine.type\\"\\n }\\n },\\n \\"service.name\\": {\\n \\"terms\\": {\\n \\"field\\": \\"service.name\\"\\n }\\n }\\n },\\n \\"aggregations\\": {\\n \\"avg_cpu\\": {\\n \\"avg\\": {\\n \\"field\\": \\"system.cpu.total.norm.pct\\"\\n }\\n },\\n \\"percentiles_cpu\\": {\\n \\"percentiles\\": {\\n \\"field\\": \\"system.cpu.total.norm.pct\\",\\n \\"percents\\": [\\n 75,\\n 95,\\n 99\\n ]\\n }\\n },\\n \\"avg_transaction_duration\\": {\\n \\"avg\\": {\\n \\"field\\": \\"transaction.duration.us\\"\\n }\\n },\\n \\"percentiles_transaction_duration\\": {\\n \\"percentiles\\": {\\n \\"field\\": \\"transaction.duration.us\\",\\n \\"percents\\": [\\n 75,\\n 95,\\n 99\\n ]\\n }\\n },\\n \\"count_machines\\": {\\n \\"cardinality\\": {\\n \\"field\\": \\"cloud.instance.id\\"\\n }\\n }\\n }\\n },\\n \\"dest\\": {\\n \\"index\\": \\"cloud-costs\\",\\n \\"pipeline\\": \\"cloud-costs\\"\\n },\\n \\"sync\\": {\\n \\"time\\": {\\n \\"delay\\": \\"120s\\",\\n \\"field\\": \\"@timestamp\\"\\n }\\n },\\n \\"settings\\": {\\n \\"max_page_search_size\\": 1000\\n }\\n}\\n```\\n\\nOnce the transform is created and running, we need a Kibana Data View for the index: cloud-costs. For the transaction, use the custom formatter inside Kibana and set its format to “Duration” in “microseconds.”\\n\\n![cloud costs](/assets/images/optimize-cloud-resources-apm-observability/elastic-blog-2-cloud-costs.png)\\n\\nWith that, everything is arranged and ready to go.\\n\\n## Observing infrastructure changes\\n\\nBelow I created a dashboard that allows us to identify:\\n\\n- How much costs a certain service creates\\n- CPU usage\\n- Memory usage\\n- Transaction duration\\n- Identify cost-saving potential\\n\\n![graphs](/assets/images/optimize-cloud-resources-apm-observability/elastic-blog-3-graphs.png)\\n\\nFrom left to right, we want to focus on the very first chart. We have the bars representing the CPU as average in green and 95th percentile in blue on top. It goes from 0 to 100% and is normalized, meaning that even with 8 CPU cores, it will still read 100% usage and not 800%. The line graph represents the transaction duration, the average being in red, and the 95th percentile in purple. Last, we have the orange area at the bottom, which is the average memory usage on that host.\\n\\nWe immediately realize that our calculator does not need a lot of memory. Hovering over the graph reveals 2.89% memory usage. The e2-standard-8 machine that we are using has 32 GB of memory. We occasionally spike to 100% CPU in the 95th percentile. When this happens, we see that the average transaction duration spikes to 2.5 milliseconds. However, every hour this machine costs us a rounded 30 cents. Using this information, we can now downsize to a better fit. The average CPU usage is around 11-13%, and the 95th percentile is not that far away.\\n\\nBecause we are using 8 CPUs, one could now say that 12.5% represents a full core, but that is just an assumption on a piece of paper. Nonetheless, we know there is a lot of headroom, and we can downscale quite a bit. In this case, I decided to go to 2 CPUs and 2 GB of RAM, known as e2-highcpu2. This should fit my calculator application better. We barely touched the RAM, 2.89% out of 32GB are roughly 1GB of use. After the change and reboot of the calculator machine, I started the same Locust test to identify my CPU usage and, more importantly, if my transactions get slower, and if so, by how much. Ultimately, I want to decide whether 1 millisecond more latency is worth 10 more cents per hour. I added the change as an annotation in Lens.\\n\\nAfter letting it run for a bit, we can now identify the smaller hosts\' impact. In this case, we can see that the average did not change. However, the 95th percentile — as in 95% of all transactions are below this value — did spike up. Again, it looks bad at first, but checking in, it went from ~1.5 milliseconds to ~2.10 milliseconds, a ~0.6 millisecond increase. Now, you can decide whether that 0.6 millisecond increase is worth paying ~180$ more per month or if the current latency is good enough.\\n\\n## Conclusion\\n\\nObservability is more than just collecting logs, metrics, and traces. Linking user experience to cloud costs allows your business to identify areas where you can save money. Having the right tools at your disposal will help you generate those insights quickly. Making informed decisions about how to optimize your cloud cost and ultimately improve the user experience is the bottom-line goal.\\n\\nThe dashboard and data view can be found in my [GitHub repository](https://github.com/philippkahr/blogs/tree/main/apm-cost-optimisation). You can download the .ndjson file and import it using the Saved Objects inside Stack Management in Kibana.\\n\\n## Caveats\\n\\nPricing is only for base machines without any disk information, static public IP addresses, and any other additional cost, such as licenses for operating systems. Furthermore, it excludes spot pricing, discounts, or free credits. Additionally, data transfer costs between services are also not included. We only calculate it based on the minute rate of the service running — we are not checking billing intervals from Google Cloud. In our case, we would bill per minute, regardless of what Google Cloud has. Using the count for unique instance.ids work as intended. However, if a machine is only running for one minute, we calculate it based on the hourly rate. So, a machine running for one minute, will cost the same as running for 50 minutes — at least how we calculate it. The transform uses calendar hour intervals; therefore, it\'s 8 am-9 am, 9 am-10 am, and so on.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var u=Object.create;var o=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var i in e)o(t,i,{get:e[i],enumerable:!0})},s=(t,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of p(e))!f.call(t,a)&&a!==i&&o(t,a,{get:()=>e[a],enumerable:!(r=m(e,a))||r.enumerable});return t};var b=(t,e,i)=>(i=t!=null?u(g(t)):{},s(e||!t||!t.__esModule?o(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>s(o({},\\"__esModule\\",{value:!0}),t);var l=w((k,c)=>{c.exports=_jsx_runtime});var x={};y(x,{default:()=>d,frontmatter:()=>_});var n=b(l()),_={title:\\"Optimizing cloud resources and cost with APM metadata in Elastic Observability\\",slug:\\"optimize-cloud-resources-apm-observability\\",date:\\"2023-08-16\\",description:\\"Optimize cloud costs with Elastic APM. Learn how to leverage cloud metadata, calculate pricing, and make smarter decisions for better performance.\\",author:[{slug:\\"philipp-kahr\\"},{slug:\\"nathan-smith\\"}],image:\\"illustration-out-of-box-data-vis-1680x980.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"google-cloud\\"},{slug:\\"cost\\"},{slug:\\"metrics\\"}]};function h(t){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"Application performance monitoring (APM) is much more than capturing and tracking errors and stack traces. Today\\\\u2019s cloud-based businesses deploy applications across various regions and even cloud providers. So, harnessing the power of metadata provided by the Elastic APM agents becomes more critical. Leveraging the metadata, including crucial information like cloud region, provider, and machine type, allows us to track costs across the application stack. In this blog post, we look at how we can use cloud metadata to empower businesses to make smarter and cost-effective decisions, all while improving resource utilization and the user experience.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"First, we need an example application that allows us to monitor infrastructure changes effectively. We use a Python Flask application with the Elastic Python APM agent. The application is a simple calculator taking the numbers as a REST request. We utilize Locust \\\\u2014 a simple load-testing tool to evaluate performance under varying workloads.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The next step includes obtaining the pricing information associated with the cloud services. Every cloud provider is different. Most of them offer an option to retrieve pricing through an API. But today, we will focus on Google Cloud and will leverage their pricing calculator to retrieve relevant cost information.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"the-calculator-and-google-cloud-pricing\\",children:\\"The calculator and Google Cloud pricing\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To perform a cost analysis, we need to know the cost of the machines in use. Google provides a billing \\",(0,n.jsx)(e.a,{href:\\"https://cloud.google.com/billing/v1/how-tos/catalog-api\\",rel:\\"nofollow\\",children:\\"API\\"}),\\" and \\",(0,n.jsx)(e.a,{href:\\"https://cloud.google.com/billing/docs/reference/libraries#client-libraries-install-python\\",rel:\\"nofollow\\",children:\\"Client Library\\"}),\\" to fetch the necessary data programmatically. In this blog, we are not covering the API approach. Instead, the \\",(0,n.jsx)(e.a,{href:\\"https://cloud.google.com/products/calculator\\",rel:\\"nofollow\\",children:\\"Google Cloud Pricing Calculator\\"}),\\" is enough. Select the machine type and region in the calculator and set the count 1 instance. It will then report the total estimated cost for this machine. Doing this for an e2-standard-4 machine type results in 107.7071784 US$ for a runtime of 730 hours.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now, let\\\\u2019s go to our Kibana\\\\xAE where we will create a new index inside Dev Tools. Since we don\\\\u2019t want to analyze text, we will tell Elasticsearch\\\\xAE to treat every text as a keyword. The index name is cloud-billing. I might want to do the same for Azure and AWS, then I can append it to the same index.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT cloud-billing\\n{\\n \\"mappings\\": {\\n \\"dynamic_templates\\": [\\n {\\n \\"stringsaskeywords\\": {\\n \\"match\\": \\"*\\",\\n \\"match_mapping_type\\": \\"string\\",\\n \\"mapping\\": {\\n \\"type\\": \\"keyword\\"\\n }\\n }\\n }\\n ]\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Next up is crafting our billing document:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`POST cloud-billing/_doc/e2-standard-4_europe-west4\\n{\\n \\"machine\\": {\\n \\"enrichment\\": \\"e2-standard-4_europe-west4\\"\\n },\\n \\"cloud\\": {\\n \\"machine\\": {\\n \\"type\\": \\"e2-standard-4\\"\\n },\\n \\"region\\": \\"europe-west4\\",\\n \\"provider\\": \\"google\\"\\n },\\n \\"stats\\": {\\n \\"cpu\\": 4,\\n \\"memory\\": 8\\n },\\n \\"price\\": {\\n \\"minute\\": 0.002459068,\\n \\"hour\\": 0.14754408,\\n \\"month\\": 107.7071784\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We create a document and set a custom ID. This ID matches the instance name and the region since the machines\' costs may differ in each region. Automatic IDs could be problematic because I might want to update what a machine costs regularly. I could use a timestamped index for that and only ever use the latest document matching. But this way, I can update and don\\\\u2019t have to worry about it. I calculated the price down to minute and hour prices as well. The most important thing is the machine.enrichment field, which is the same as the ID. The same instance type can exist in multiple regions, but our enrichment processor is limited to match or range. We create a matching name that can explicitly match as in e2-standard-4_europe-west4. It\\\\u2019s up to you to decide whether you want the cloud provider in there and make it google_e2-standard-4_europ-west-4.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"calculating-the-cost\\",children:\\"Calculating the cost\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"There are multiple ways of achieving this in the Elastic Stack. In this case, we will use an enrich policy, ingest pipeline, and transform.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The enrich policy is rather easy to setup:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _enrich/policy/cloud-billing\\n{\\n \\"match\\": {\\n \\"indices\\": \\"cloud-billing\\",\\n \\"match_field\\": \\"machine.enrichment\\",\\n \\"enrich_fields\\": [\\"price.minute\\", \\"price.hour\\", \\"price.month\\"]\\n }\\n}\\n\\nPOST _enrich/policy/cloud-billing/_execute\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Don\\\\u2019t forget to run the _execute at the end of it. This is necessary to make the internal indices used by the enrichment in the ingest pipeline. The ingest pipeline is rather minimalistic \\\\u2014 it calls the enrichment and renames a field. This is where our machine.enrichment field comes in. One caveat around enrichment is that when you add new documents to the cloud-billing index, you need to rerun the _execute statement. The last bit calculates the total cost with the count of unique machines seen.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/cloud-billing\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"_temp.machine_type\\",\\n \\"value\\": \\"{{cloud.machine.type}}_{{cloud.region}}\\"\\n }\\n },\\n {\\n \\"enrich\\": {\\n \\"policy_name\\": \\"cloud-billing\\",\\n \\"field\\": \\"_temp.machine_type\\",\\n \\"target_field\\": \\"enrichment\\"\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"field\\": \\"enrichment.price\\",\\n \\"target_field\\": \\"price\\"\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"field\\": [\\n \\"_temp\\",\\n \\"enrichment\\"\\n ]\\n }\\n },\\n {\\n \\"script\\": {\\n \\"source\\": \\"ctx.total_price=ctx.count_machines*ctx.price.hour\\"\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Since this is all configured now, we are ready for our Transform. For this, we need a data view that matches the APM data_streams. This is traces-apm*, metrics-apm.*, logs-apm.*. For the Transform, go to the Transform UI in Kibana and configure it in the following way:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/optimize-cloud-resources-apm-observability/elastic-blog-1-transform-configuration.png\\",alt:\\"transform configuration\\",width:\\"1999\\",height:\\"1309\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We are doing an hourly breakdown, therefore, I get a document per service, per hour, per machine type. The interesting bit is the aggregations. I want to see the average CPU usage and the 75,95,99 percentile, to view the CPU usage on an hourly basis. Allowing me to identify the CPU usage across an hour. At the bottom, give the transform a name and select an index cloud-costs and select the cloud-billing ingest pipeline.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here is the entire transform as a JSON document:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _transform/cloud-billing\\n{\\n \\"source\\": {\\n \\"index\\": [\\n \\"traces-apm*\\",\\n \\"metrics-apm.*\\",\\n \\"logs-apm.*\\"\\n ],\\n \\"query\\": {\\n \\"bool\\": {\\n \\"filter\\": [\\n {\\n \\"bool\\": {\\n \\"should\\": [\\n {\\n \\"exists\\": {\\n \\"field\\": \\"cloud.provider\\"\\n }\\n }\\n ],\\n \\"minimum_should_match\\": 1\\n }\\n }\\n ]\\n }\\n }\\n },\\n \\"pivot\\": {\\n \\"group_by\\": {\\n \\"@timestamp\\": {\\n \\"date_histogram\\": {\\n \\"field\\": \\"@timestamp\\",\\n \\"calendar_interval\\": \\"1h\\"\\n }\\n },\\n \\"cloud.provider\\": {\\n \\"terms\\": {\\n \\"field\\": \\"cloud.provider\\"\\n }\\n },\\n \\"cloud.region\\": {\\n \\"terms\\": {\\n \\"field\\": \\"cloud.region\\"\\n }\\n },\\n \\"cloud.machine.type\\": {\\n \\"terms\\": {\\n \\"field\\": \\"cloud.machine.type\\"\\n }\\n },\\n \\"service.name\\": {\\n \\"terms\\": {\\n \\"field\\": \\"service.name\\"\\n }\\n }\\n },\\n \\"aggregations\\": {\\n \\"avg_cpu\\": {\\n \\"avg\\": {\\n \\"field\\": \\"system.cpu.total.norm.pct\\"\\n }\\n },\\n \\"percentiles_cpu\\": {\\n \\"percentiles\\": {\\n \\"field\\": \\"system.cpu.total.norm.pct\\",\\n \\"percents\\": [\\n 75,\\n 95,\\n 99\\n ]\\n }\\n },\\n \\"avg_transaction_duration\\": {\\n \\"avg\\": {\\n \\"field\\": \\"transaction.duration.us\\"\\n }\\n },\\n \\"percentiles_transaction_duration\\": {\\n \\"percentiles\\": {\\n \\"field\\": \\"transaction.duration.us\\",\\n \\"percents\\": [\\n 75,\\n 95,\\n 99\\n ]\\n }\\n },\\n \\"count_machines\\": {\\n \\"cardinality\\": {\\n \\"field\\": \\"cloud.instance.id\\"\\n }\\n }\\n }\\n },\\n \\"dest\\": {\\n \\"index\\": \\"cloud-costs\\",\\n \\"pipeline\\": \\"cloud-costs\\"\\n },\\n \\"sync\\": {\\n \\"time\\": {\\n \\"delay\\": \\"120s\\",\\n \\"field\\": \\"@timestamp\\"\\n }\\n },\\n \\"settings\\": {\\n \\"max_page_search_size\\": 1000\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once the transform is created and running, we need a Kibana Data View for the index: cloud-costs. For the transaction, use the custom formatter inside Kibana and set its format to \\\\u201CDuration\\\\u201D in \\\\u201Cmicroseconds.\\\\u201D\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/optimize-cloud-resources-apm-observability/elastic-blog-2-cloud-costs.png\\",alt:\\"cloud costs\\",width:\\"1999\\",height:\\"1473\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"With that, everything is arranged and ready to go.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"observing-infrastructure-changes\\",children:\\"Observing infrastructure changes\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Below I created a dashboard that allows us to identify:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"How much costs a certain service creates\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"CPU usage\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Memory usage\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Transaction duration\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Identify cost-saving potential\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/optimize-cloud-resources-apm-observability/elastic-blog-3-graphs.png\\",alt:\\"graphs\\",width:\\"1999\\",height:\\"1329\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"From left to right, we want to focus on the very first chart. We have the bars representing the CPU as average in green and 95th percentile in blue on top. It goes from 0 to 100% and is normalized, meaning that even with 8 CPU cores, it will still read 100% usage and not 800%. The line graph represents the transaction duration, the average being in red, and the 95th percentile in purple. Last, we have the orange area at the bottom, which is the average memory usage on that host.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We immediately realize that our calculator does not need a lot of memory. Hovering over the graph reveals 2.89% memory usage. The e2-standard-8 machine that we are using has 32 GB of memory. We occasionally spike to 100% CPU in the 95th percentile. When this happens, we see that the average transaction duration spikes to 2.5 milliseconds. However, every hour this machine costs us a rounded 30 cents. Using this information, we can now downsize to a better fit. The average CPU usage is around 11-13%, and the 95th percentile is not that far away.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Because we are using 8 CPUs, one could now say that 12.5% represents a full core, but that is just an assumption on a piece of paper. Nonetheless, we know there is a lot of headroom, and we can downscale quite a bit. In this case, I decided to go to 2 CPUs and 2 GB of RAM, known as e2-highcpu2. This should fit my calculator application better. We barely touched the RAM, 2.89% out of 32GB are roughly 1GB of use. After the change and reboot of the calculator machine, I started the same Locust test to identify my CPU usage and, more importantly, if my transactions get slower, and if so, by how much. Ultimately, I want to decide whether 1 millisecond more latency is worth 10 more cents per hour. I added the change as an annotation in Lens.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"After letting it run for a bit, we can now identify the smaller hosts\' impact. In this case, we can see that the average did not change. However, the 95th percentile \\\\u2014 as in 95% of all transactions are below this value \\\\u2014 did spike up. Again, it looks bad at first, but checking in, it went from ~1.5 milliseconds to ~2.10 milliseconds, a ~0.6 millisecond increase. Now, you can decide whether that 0.6 millisecond increase is worth paying ~180$ more per month or if the current latency is good enough.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Observability is more than just collecting logs, metrics, and traces. Linking user experience to cloud costs allows your business to identify areas where you can save money. Having the right tools at your disposal will help you generate those insights quickly. Making informed decisions about how to optimize your cloud cost and ultimately improve the user experience is the bottom-line goal.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The dashboard and data view can be found in my \\",(0,n.jsx)(e.a,{href:\\"https://github.com/philippkahr/blogs/tree/main/apm-cost-optimisation\\",rel:\\"nofollow\\",children:\\"GitHub repository\\"}),\\". You can download the .ndjson file and import it using the Saved Objects inside Stack Management in Kibana.\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"caveats\\",children:\\"Caveats\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Pricing is only for base machines without any disk information, static public IP addresses, and any other additional cost, such as licenses for operating systems. Furthermore, it excludes spot pricing, discounts, or free credits. Additionally, data transfer costs between services are also not included. We only calculate it based on the minute rate of the service running \\\\u2014 we are not checking billing intervals from Google Cloud. In our case, we would bill per minute, regardless of what Google Cloud has. Using the count for unique instance.ids work as intended. However, if a machine is only running for one minute, we calculate it based on the hourly rate. So, a machine running for one minute, will cost the same as running for 50 minutes \\\\u2014 at least how we calculate it. The transform uses calendar hour intervals; therefore, it\'s 8 am-9 am, 9 am-10 am, and so on.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(h,{...t})}):h(t)}return v(x);})();\\n;return Component;"},"_id":"articles/optimize-cloud-resources-cost-apm-metadata-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/optimize-cloud-resources-cost-apm-metadata-elastic-observability.mdx","sourceFileName":"optimize-cloud-resources-cost-apm-metadata-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/optimize-cloud-resources-cost-apm-metadata-elastic-observability"},"type":"Article","imageUrl":"/assets/images/optimize-cloud-resources-apm-observability/illustration-out-of-box-data-vis-1680x980.png","readingTime":"16 min read","url":"/optimize-cloud-resources-apm-observability","headings":[{"level":2,"title":"The calculator and Google Cloud pricing","href":"#the-calculator-and-google-cloud-pricing"},{"level":2,"title":"Calculating the cost","href":"#calculating-the-cost"},{"level":2,"title":"Observing infrastructure changes","href":"#observing-infrastructure-changes"},{"level":2,"title":"Conclusion","href":"#conclusion"},{"level":2,"title":"Caveats","href":"#caveats"}]},{"title":"Using NLP and Pattern Matching to Detect, Assess, and Redact PII in Logs - Part 1","slug":"pii-ner-regex-assess-redact-part-1","date":"2024-09-25","description":"How to detect and assess PII in your logs using Elasticsearch and NLP","image":"pii-ner-regex-assess-redact-part-1.png","author":[{"slug":"stephen-brown","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\n## Introduction:\\n\\nThe prevalence of high-entropy logs in distributed systems has significantly raised the risk of PII (Personally Identifiable Information) seeping into our logs, which can result in security and compliance issues. This 2-part blog delves into the crucial task of identifying and managing this issue using the Elastic Stack. We will explore using NLP (Natural Language Processing) and Pattern matching to detect, assess, and, where feasible, redact PII from logs that are being ingested into Elasticsearch.\\n\\nIn **Part 1** of this blog, we will cover the following:\\n\\n* Review the techniques and tools we have available to manage PII in our logs\\n* Understand the roles of NLP / NER in PII detection\\n* Build a composable processing pipeline to detect and assess PII\\n* Sample logs and run them through the NER Model\\n* Assess the results of the NER Model \\n\\nIn [Part 2 of this blog](https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-2) of this blog, we will cover the following:\\n\\n* Redact PII using NER and the redact processor\\n* Apply field-level security to control access to the un-redacted data\\n* Enhance the dashboards and alerts\\n* Production considerations and scaling\\n* How to run these processes on incoming or historical data\\n\\nHere is the overall flow we will construct over the 2 blogs:\\n\\n![PII Overall Flow](/assets/images/pii-ner-regex-assess-redact-part-1/pii-overall-flow.png)\\n\\nAll code for this exercise can be found at:\\n[https://github.com/bvader/elastic-pii](https://github.com/bvader/elastic-pii). \\n\\n## Tools and Techniques\\n\\nThere are four general capabilities that we will use for this exercise. \\n\\n* Named Entity Recognition Detection (NER)\\n* Pattern Matching Detection\\n* Log Sampling\\n* Ingest Pipelines as Composable Processing \\n\\n\\n#### Named Entity Recognition (NER) Detection\\n\\nNER is a sub-task of Natural Language Processing (NLP) that involves identifying and categorizing named entities in unstructured text into predefined categories such as:\\n\\n* Person: Names of individuals, including celebrities, politicians, and historical figures.\\n* Organization: Names of companies, institutions, and organizations.\\n* Location: Geographic locations, including cities, countries, and landmarks.\\n* Event: Names of events, including conferences, meetings, and festivals.\\n\\nFor our use PII case, we will choose the base BERT NER model [bert-base-NER](https://huggingface.co/dslim/bert-base-NER) that can be downloaded from [Hugging Face](https://huggingface.co) and loaded into Elasticsearch as a trained model.\\n\\n\\n**Important Note:** NER / NLP Models are CPU-intensive and expensive to run at scale; thus, we will want to employ a sampling technique to understand the risk in our logs without sending the full logs volume through the NER Model. We will discuss the performance and scaling of the NER model in part 2 of the blog. \\n\\n#### Pattern Matching Detection \\n\\nIn addition to using an NER, regex pattern matching is a powerful tool for detecting and redacting PII based on common patterns. The Elasticsearch [redact](https://www.elastic.co/guide/en/elasticsearch/reference/current/redact-processor.html) processor is built for this use case.\\n\\n\\n#### Log Sampling\\n\\nConsidering the performance implications of NER and the fact that we may be ingesting a large volume of logs into Elasticsearch, it makes sense to sample our incoming logs. We will build a simple log sampler to accomplish this. \\n\\n\\n#### Ingest Pipelines as Composable Processing \\n\\nWe will create several pipelines, each focusing on a specific capability and a main ingest pipeline to orchestrate the overall process. \\n\\n## Building the Processing Flow \\n\\n#### Logs Sampling + Composable Ingest Pipelines\\n\\nThe first thing we will do is set up a sampler to sample our logs. This ingest pipeline simply takes a sampling rate between 0 (no log) and 10000 (all logs), which allows as low as ~0.01% sampling rate and marks the sampled logs with `sample.sampled: true`. Further processing on the logs will be driven by the value of `sample.sampled`. The `sample.sample_rate` can be set here or \\"passed in\\" from the orchestration pipeline.\\n\\nThe command should be run from the Kibana -> Dev Tools\\n\\n[The code can be found here](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/logs-sampler-composable-pipelines-part-1.json) for the following three sections of code. \\n\\n\\n\\n logs-sampler pipeline code - click to open/close
\\n\\n```bash\\n# logs-sampler pipeline - part 1\\nDELETE _ingest/pipeline/logs-sampler\\nPUT _ingest/pipeline/logs-sampler\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"if\\": \\"ctx.sample.sample_rate == null\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 10000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Determine if keeping unsampled docs\\",\\n \\"if\\": \\"ctx.sample.keep_unsampled == null\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"sample.sampled\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"script\\": {\\n \\"source\\": \\"\\"\\" Random r = new Random();\\n ctx.sample.random = r.nextInt(params.max); \\"\\"\\",\\n \\"params\\": {\\n \\"max\\": 10000\\n }\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx.sample.random <= ctx.sample.sample_rate\\",\\n \\"field\\": \\"sample.sampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"drop\\": {\\n \\"description\\": \\"Drop unsampled document if applicable\\",\\n \\"if\\": \\"ctx.sample.keep_unsampled == false && ctx.sample.sampled == false\\"\\n }\\n }\\n ]\\n}\\n```\\n \\n\\nNow, let\'s test the logs sampler. We will build the first part of the composable pipeline. We will be sending logs to the logs-generic-default data stream. With that in mind, we will create the `logs@custom` ingest pipeline that will be automatically called using the logs [data stream framework](https://www.elastic.co/guide/en/fleet/current/data-streams.html#data-streams-pipelines) for customization. We will add one additional level of abstraction so that you can apply this PII processing to other data streams.\\n\\nNext, we will create the `process-pii` pipeline. This is the core processing pipeline where we will orchestrate PII processing component pipelines. In this first step, we will simply apply the sampling logic. Note that we are setting the sampling rate to 100, which is equivalent to 10% of the logs.\\n\\n\\n process-pii pipeline code - click to open/close
\\n\\n```bash\\n# Process PII pipeline - part 1\\nDELETE _ingest/pipeline/process-pii\\nPUT _ingest/pipeline/process-pii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set true if enabling sampling, otherwise false\\",\\n \\"field\\": \\"sample.enabled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 1000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == true\\",\\n \\"name\\": \\"logs-sampler\\",\\n \\"ignore_failure\\": true\\n }\\n }\\n ]\\n}\\n```\\n \\n\\nFinally, we create the logs `logs@custom`, which will simply call our `process-pii` pipeline based on the correct `data_stream.dataset`\\n\\n\\n logs@custom pipeline code - click to open/close
\\n\\n```bash\\n# logs@custom pipeline - part 1\\nDELETE _ingest/pipeline/logs@custom\\nPUT _ingest/pipeline/logs@custom\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"pipelinetoplevel\\",\\n \\"value\\": \\"logs@custom\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"pipelinetoplevelinfo\\",\\n \\"value\\": \\"{{{data_stream.dataset}}}\\"\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"description\\" : \\"Call the process_pii pipeline on the correct dataset\\",\\n \\"if\\": \\"ctx?.data_stream?.dataset == \'pii\'\\", \\n \\"name\\": \\"process-pii\\"\\n }\\n }\\n ]\\n}\\n```\\n \\n\\nNow, let\'s test to see the sampling at work.\\n\\nLoad the data as described here [Data Loading Appendix](#data-loading-appendix). Let\'s use the sample data first, and we will talk about how to test with your incoming or historical logs later at the end of this blog. \\n\\nIf you look at Observability -> Logs -> Logs Explorer with KQL filter `data_stream.dataset : pii` and Breakdown by sample.sampled, you should see the breakdown to be approximately 10%\\n\\n![PII Discover 1](/assets/images/pii-ner-regex-assess-redact-part-1/pii-discover-1-part-1.png)\\n\\n\\nAt this point we have a composable ingest pipeline that is \\"sampling\\" logs. As a bonus, you can use this logs sampler for any other use cases you have as well. \\n\\n#### Loading, Configuration, and Execution of the NER Pipeline\\n\\n\\n##### Loading the NER Model\\n\\n\\nYou will need a Machine Learning node to run the NER model on. In this exercise, we are using [Elastic Cloud Hosted Deployment ](https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html)on AWS with the [CPU Optimized (ARM)](https://www.elastic.co/guide/en/cloud/current/ec_selecting_the_right_configuration_for_you.html) architecture. The NER inference will run on a Machine Learning AWS c5d node. There will be GPU options in the future, but today, we will stick with CPU architecture. \\n\\nThis exercise will use a single c5d with 8 GB RAM with 4.2 vCPU up to 8.4 vCPU\\n\\n![ML Node](/assets/images/pii-ner-regex-assess-redact-part-1/pii-ml-node-part-1.png)\\n\\nPlease refer to the official documentation on [how to import an NLP-trained model into Elasticsearch](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-import-model.html) for complete instructions on uploading, configuring, and deploying the model.\\n\\nThe quickest way to get the model is using the Eland Docker method. \\n \\nThe following command will load the model into Elasticsearch but will not start it. We will do that in the next step. \\n\\n```bash\\ndocker run -it --rm --network host docker.elastic.co/eland/eland \\\\\\n eland_import_hub_model \\\\\\n --url https://mydeployment.es.us-west-1.aws.found.io:443/ \\\\\\n -u elastic -p password \\\\\\n --hub-model-id dslim/bert-base-NER --task-type ner\\n\\n```\\n\\n##### Deploy and Start the NER Model\\n\\nIn general, to improve ingest performance, increase throughput by adding more allocations to the deployment. For improved search speed, increase the number of threads per allocation.\\n\\nTo scale ingest, we will focus on scaling the allocations for the deployed model. More information on this topic is available [here](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-deploy-model.html). The number of allocations must be less than the available allocated processors (cores, not vCPUs) per node.\\n\\nTo deploy and start the NER Model. We will do this using the [Start trained model deployment API](https://www.elastic.co/guide/en/elasticsearch/reference/8.15/start-trained-model-deployment.html)\\n\\nWe will configure the following:\\n\\n* 4 Allocations to allow for more parallel ingestion\\n* 1 Thread per Allocation\\n* 0 Byes Cache, as we expect a low cache hit rate \\n* 8192 Queue\\n\\n\\n```\\n# Start the model with 4 Allocators x 1 Thread, no cache, and 8192 queue\\nPOST _ml/trained_models/dslim__bert-base-ner/deployment/_start?cache_size=0b&number_of_allocations=4&threads_per_allocation=1&queue_capacity=8192\\n\\n```\\n\\nYou should get a response that looks something like this.\\n\\n```bash\\n{\\n \\"assignment\\": {\\n \\"task_parameters\\": {\\n \\"model_id\\": \\"dslim__bert-base-ner\\",\\n \\"deployment_id\\": \\"dslim__bert-base-ner\\",\\n \\"model_bytes\\": 430974836,\\n \\"threads_per_allocation\\": 1,\\n \\"number_of_allocations\\": 4,\\n \\"queue_capacity\\": 8192,\\n \\"cache_size\\": \\"0\\",\\n \\"priority\\": \\"normal\\",\\n \\"per_deployment_memory_bytes\\": 430914596,\\n \\"per_allocation_memory_bytes\\": 629366952\\n },\\n...\\n \\"assignment_state\\": \\"started\\",\\n \\"start_time\\": \\"2024-09-23T21:39:18.476066615Z\\",\\n \\"max_assigned_allocations\\": 4\\n }\\n}\\n```\\n\\nThe NER model has been deployed and started and is ready to be used.\\n\\n\\nThe following ingest pipeline implements the NER model via the [inference](https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-processor.html) processor. \\n\\nThere is a significant amount of code here, but only two items of interest now exist. The rest of the code is conditional logic to drive some additional specific behavior that we will look closer at in the future. \\n\\n\\n\\n1. The inference processor calls the NER model by ID, which we loaded previously, and passes the text to be analyzed, which, in this case, is the message field, which is the text_field we want to pass to the NER model to analyze for PII.\\n\\n2. The script processor loops through the message field and uses the data generated by the NER model to replace the identified PII with redacted placeholders. This looks more complex than it really is, as it simply loops through the array of ML predictions and replaces them in the message string with constants, and stores the results in a new field `redact.message`. We will look at this a little closer in the following steps. \\n\\n\\n[The code can be found here](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/logs-sampler-composable-pipelines-part-2.json) for the following three sections of code. \\n\\n\\nThe NER PII Pipeline\\n\\n\\n logs-ner-pii-processor pipeline code - click to open/close
\\n\\n```bash\\n# NER Pipeline\\nDELETE _ingest/pipeline/logs-ner-pii-processor\\nPUT _ingest/pipeline/logs-ner-pii-processor\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to true to actually redact, false will run processors but leave original\\",\\n \\"field\\": \\"redact.enable\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to true to keep ml results for debugging\\",\\n \\"field\\": \\"redact.ner.keep_result\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to PER, LOC, ORG to skip, or NONE to not drop any replacement\\",\\n \\"field\\": \\"redact.ner.skip_entity\\",\\n \\"value\\": \\"NONE\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to PER, LOC, ORG to skip, or NONE to not drop any replacement\\",\\n \\"field\\": \\"redact.ner.minimum_score\\",\\n \\"value\\": 0\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.message == null\\",\\n \\"field\\": \\"redact.message\\",\\n \\"copy_from\\": \\"message\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.ner.successful\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.ner.found\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"inference\\": {\\n \\"model_id\\": \\"dslim__bert-base-ner\\",\\n \\"field_map\\": {\\n \\"message\\": \\"text_field\\"\\n },\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set \'error.message\'\\",\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"REDACT_NER_FAILED\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.ner.successful\\",\\n \\"value\\": false\\n }\\n }\\n ]\\n }\\n },\\n {\\n \\"script\\": {\\n \\"if\\": \\"ctx.failure_ner != \'REDACT_NER_FAILED\'\\",\\n \\"lang\\": \\"painless\\",\\n \\"source\\": \\"\\"\\"String msg = ctx[\'message\'];\\n for (item in ctx[\'ml\'][\'inference\'][\'entities\']) {\\n \\tif ((item[\'class_name\'] != ctx.redact.ner.skip_entity) && \\n \\t (item[\'class_probability\'] >= ctx.redact.ner.minimum_score)) { \\n \\t\\t msg = msg.replace(item[\'entity\'], \'<\' + \\n \\t\\t \'REDACTNER-\'+ item[\'class_name\'] + \'_NER>\')\\n \\t}\\n }\\n ctx.redact.message = msg\\"\\"\\",\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set \'error.message\'\\",\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"REDACT_REPLACEMENT_SCRIPT_FAILED\\",\\n \\"override\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.successful\\",\\n \\"value\\": false\\n }\\n }\\n ]\\n }\\n },\\n \\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.ml?.inference?.entities.size() > 0\\", \\n \\"field\\": \\"redact.ner.found\\",\\n \\"value\\": true,\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.pii?.found == null\\",\\n \\"field\\": \\"redact.pii.found\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.ner?.found == true\\",\\n \\"field\\": \\"redact.pii.found\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"if\\": \\"ctx.redact.ner.keep_result != true\\",\\n \\"field\\": [\\n \\"ml\\"\\n ],\\n \\"ignore_missing\\": true,\\n \\"ignore_failure\\": true\\n }\\n }\\n ],\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"GENERAL_FAILURE\\",\\n \\"override\\": false\\n }\\n }\\n ]\\n}\\n```\\n \\n\\n\\nThe updated PII Processor Pipeline, which now calls the NER Pipeline\\n\\n\\n process-pii pipeline code - click to open/close
\\n\\n```bash\\n# Updated Process PII pipeline that now call the NER pipeline\\nDELETE _ingest/pipeline/process-pii\\nPUT _ingest/pipeline/process-pii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set true if enabling sampling, otherwise false\\",\\n \\"field\\": \\"sample.enabled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 1000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == true\\",\\n \\"name\\": \\"logs-sampler\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-ner-pii-processor\\"\\n }\\n }\\n ]\\n}\\n\\n```\\n \\n\\nNow reload the data as described here in [Reloading the logs](#reloading-the-logs)\\n\\n### Results\\n\\nLet\'s take a look at the results with the NER processing in place. In the Logs Explorer with KQL query bar, execute the following query\\n`data_stream.dataset : pii and ml.inference.entities.class_name : (\\"PER\\" and \\"LOC\\" and \\"ORG\\" )` \\n\\nLogs Explorer should look something like this, open the top message to see the details.\\n\\n![PII Discover 2](/assets/images/pii-ner-regex-assess-redact-part-1/pii-discover-2-part-1.png)\\n\\n#### NER Model Results \\nLets take a closer look at what these fields mean.\\n\\n**Field:** `ml.inference.entities.class_name`\\\\\\n**Sample Value:** `[PER, PER, LOC, ORG, ORG]`\\\\\\n**Description:** An array of the named entity classes that the NER model has identified.\\n\\n**Field:** `ml.inference.entities.class_probability`\\\\\\n**Sample Value:** `[0.999, 0.972, 0.896, 0.506, 0.595]`\\\\\\n**Description:** The class_probability is a value between 0 and 1, which indicates how likely it is that a given data point belongs to a certain class. The higher the number, the higher the probability that the data point belongs to the named class. **This is important as in the next blog we can decide a threshold that we will want to use to alert and redact on.**\'\\nYou can see in this example it identified a `LOC` as an `ORG`, we can filter this out / find them by setting a threshold. \\n\\n**Field:** `ml.inference.entities.entity`\\\\\\n**Sample Value:** `[Paul Buck, Steven Glens, South Amyborough, ME, Costco]`\\\\\\n**Description:** The array of entities identified that align positionally with the `class_name` and `class_probability`.\\n\\n**Field:** `ml.inference.predicted_value`\\\\\\n**Sample Value:** `[2024-09-23T14:32:14.608207-07:00Z] log.level=INFO: Payment successful for order #4594 (user: [Paul Buck](PER&Paul+Buck), david59@burgess.net). Phone: 726-632-0527x520, Address: 3713 [Steven Glens](PER&Steven+Glens), [South Amyborough](LOC&South+Amyborough), [ME](ORG&ME) 93580, Ordered from: [Costco](ORG&Costco)`\\\\\\n**Description:** The predicted value of the model.\\n\\n#### PII Assessment Dashboard\\n\\nLets take a quick look at a dashboard built to assess PII the data. \\n\\nTo load the dashboard, go to Kibana -> Stack Management -> Saved Objects and import the `pii-dashboard-part-1.ndjson` file that can be found here: \\n\\nhttps://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/pii-dashboard-part-1.ndjson\\n\\nMore complete instructions on Kibana Saved Objects can be found [here](https://www.elastic.co/guide/en/kibana/current/managing-saved-objects.html).\\n\\nAfter loading the dashboard, navigate to it and select the right time range and you should see something like below. It shows metrics such as sample rate, percent of logs with NER, NER Score Trends etc. We will examine the assessment and actions in part 2 of this blog. \\n\\n![PII Dashboard 1](/assets/images/pii-ner-regex-assess-redact-part-1/pii-dashboard-1-part-1.png)\\n\\n\\n## Summary and Next Steps\\n\\nIn this first part of the blog, we have accomplished the following.\\n\\n* Reviewed the techniques and tools we have available for PII detection and assement\\n* Reviewed NLP / NER role in PII detection and assessment\\n* Built the necessary composable ingest pipelines to sample logs and run them through the NER Model\\n* Reviewed the NER results and are ready to move to the second blog\\n\\n\\nIn the upcoming [Part 2 of this blog](https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-2) of this blog, we will cover the following:\\n\\n* Redact PII using NER and redact processor\\n* Apply field-level security to control access to the un-redacted data\\n* Enhance the dashboards and alerts\\n* Production considerations and scaling\\n* How to run these processes on incoming or historical data\\n\\n## Data Loading Appendix\\n\\n#### Code\\n\\nThe data loading code can be found here: \\n\\n[https://github.com/bvader/elastic-pii](https://github.com/bvader/elastic-pii)\\n\\n```\\n$ git clone https://github.com/bvader/elastic-pii.git\\n```\\n\\n\\n#### Creating and Loading the Sample Data Set \\n\\n```\\n$ cd elastic-pii\\n$ cd python\\n$ python -m venv .env\\n$ source .env/bin/activate\\n$ pip install elasticsearch\\n$ pip install Faker\\n```\\n\\nRun the log generator \\n```\\n$ python generate_random_logs.py\\n```\\n\\nIf you do not changes any parameters, this will create 10000 random logs in a file named pii.log with a mix of logs that containe and do not contain PII. \\n\\nEdit `load_logs.py` and set the following \\n\\n```\\n# The Elastic User \\nELASTIC_USER = \\"elastic\\"\\n\\n# Password for the \'elastic\' user generated by Elasticsearch\\nELASTIC_PASSWORD = \\"askdjfhasldfkjhasdf\\"\\n\\n# Found in the \'Manage Deployment\' page\\nELASTIC_CLOUD_ID = \\"deployment:sadfjhasfdlkjsdhf3VuZC5pbzo0NDMkYjA0NmQ0YjFiYzg5NDM3ZDgxM2YxM2RhZjQ3OGE3MzIkZGJmNTE0OGEwODEzNGEwN2E3M2YwYjcyZjljYTliZWQ=\\"\\n```\\nThen run the following command. \\n\\n\\n```\\n$ python load_logs.py\\n```\\n#### Reloading the logs\\n**Note** To reload the logs, you can simply re-run the above command. You can run the command multiple time during this exercise and the logs will be reloaded (actually loaded again). The new logs will not collide with previous runs as there will be a unique `run.id` for each run which is displayed at the end of the loading process.\\n\\n```\\n$ python load_logs.py\\n```\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),b=(i,e)=>{for(var t in e)a(i,t,{get:e[t],enumerable:!0})},o=(i,e,t,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let l of m(e))!f.call(i,l)&&l!==t&&a(i,l,{get:()=>e[l],enumerable:!(s=g(e,l))||s.enumerable});return i};var y=(i,e,t)=>(t=i!=null?p(u(i)):{},o(e||!i||!i.__esModule?a(t,\\"default\\",{value:i,enumerable:!0}):t,i)),_=i=>o(a({},\\"__esModule\\",{value:!0}),i);var d=w((N,r)=>{r.exports=_jsx_runtime});var E={};b(E,{default:()=>h,frontmatter:()=>v});var n=y(d()),v={title:\\"Using NLP and Pattern Matching to Detect, Assess, and Redact PII in Logs - Part 1\\",slug:\\"pii-ner-regex-assess-redact-part-1\\",date:\\"2024-09-25\\",description:\\"How to detect and assess PII in your logs using Elasticsearch and NLP\\",author:[{slug:\\"stephen-brown\\"}],image:\\"pii-ner-regex-assess-redact-part-1.png\\",tags:[{slug:\\"log-analytics\\"}]};function c(i){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",h4:\\"h4\\",h5:\\"h5\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.h2,{id:\\"introduction\\",children:\\"Introduction:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The prevalence of high-entropy logs in distributed systems has significantly raised the risk of PII (Personally Identifiable Information) seeping into our logs, which can result in security and compliance issues. This 2-part blog delves into the crucial task of identifying and managing this issue using the Elastic Stack. We will explore using NLP (Natural Language Processing) and Pattern matching to detect, assess, and, where feasible, redact PII from logs that are being ingested into Elasticsearch.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In \\",(0,n.jsx)(e.strong,{children:\\"Part 1\\"}),\\" of this blog, we will cover the following:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Review the techniques and tools we have available to manage PII in our logs\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Understand the roles of NLP / NER in PII detection\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Build a composable processing pipeline to detect and assess PII\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Sample logs and run them through the NER Model\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Assess the results of the NER Model\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-2\\",rel:\\"nofollow\\",children:\\"Part 2 of this blog\\"}),\\" of this blog, we will cover the following:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Redact PII using NER and the redact processor\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Apply field-level security to control access to the un-redacted data\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Enhance the dashboards and alerts\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Production considerations and scaling\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"How to run these processes on incoming or historical data\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Here is the overall flow we will construct over the 2 blogs:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-1/pii-overall-flow.png\\",alt:\\"PII Overall Flow\\",width:\\"1679\\",height:\\"1110\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[`All code for this exercise can be found at:\\n`,(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii\\",rel:\\"nofollow\\",children:\\"https://github.com/bvader/elastic-pii\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"tools-and-techniques\\",children:\\"Tools and Techniques\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"There are four general capabilities that we will use for this exercise.\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Named Entity Recognition Detection (NER)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Pattern Matching Detection\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Log Sampling\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Ingest Pipelines as Composable Processing\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h4,{id:\\"named-entity-recognition-ner-detection\\",children:\\"Named Entity Recognition (NER) Detection\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"NER is a sub-task of Natural Language Processing (NLP) that involves identifying and categorizing named entities in unstructured text into predefined categories such as:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Person: Names of individuals, including celebrities, politicians, and historical figures.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Organization: Names of companies, institutions, and organizations.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Location: Geographic locations, including cities, countries, and landmarks.\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Event: Names of events, including conferences, meetings, and festivals.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"For our use PII case, we will choose the base BERT NER model \\",(0,n.jsx)(e.a,{href:\\"https://huggingface.co/dslim/bert-base-NER\\",rel:\\"nofollow\\",children:\\"bert-base-NER\\"}),\\" that can be downloaded from \\",(0,n.jsx)(e.a,{href:\\"https://huggingface.co\\",rel:\\"nofollow\\",children:\\"Hugging Face\\"}),\\" and loaded into Elasticsearch as a trained model.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Important Note:\\"}),\\" NER / NLP Models are CPU-intensive and expensive to run at scale; thus, we will want to employ a sampling technique to understand the risk in our logs without sending the full logs volume through the NER Model. We will discuss the performance and scaling of the NER model in part 2 of the blog.\\"]}),`\\n`,(0,n.jsx)(e.h4,{id:\\"pattern-matching-detection\\",children:\\"Pattern Matching Detection\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In addition to using an NER, regex pattern matching is a powerful tool for detecting and redacting PII based on common patterns. The Elasticsearch \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/redact-processor.html\\",rel:\\"nofollow\\",children:\\"redact\\"}),\\" processor is built for this use case.\\"]}),`\\n`,(0,n.jsx)(e.h4,{id:\\"log-sampling\\",children:\\"Log Sampling\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Considering the performance implications of NER and the fact that we may be ingesting a large volume of logs into Elasticsearch, it makes sense to sample our incoming logs. We will build a simple log sampler to accomplish this.\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"ingest-pipelines-as-composable-processing\\",children:\\"Ingest Pipelines as Composable Processing\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We will create several pipelines, each focusing on a specific capability and a main ingest pipeline to orchestrate the overall process.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"building-the-processing-flow\\",children:\\"Building the Processing Flow\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"logs-sampling--composable-ingest-pipelines\\",children:\\"Logs Sampling + Composable Ingest Pipelines\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The first thing we will do is set up a sampler to sample our logs. This ingest pipeline simply takes a sampling rate between 0 (no log) and 10000 (all logs), which allows as low as ~0.01% sampling rate and marks the sampled logs with \\",(0,n.jsx)(e.code,{children:\\"sample.sampled: true\\"}),\\". Further processing on the logs will be driven by the value of \\",(0,n.jsx)(e.code,{children:\\"sample.sampled\\"}),\\". The \\",(0,n.jsx)(e.code,{children:\\"sample.sample_rate\\"}),\' can be set here or \\"passed in\\" from the orchestration pipeline.\']}),`\\n`,(0,n.jsx)(e.p,{children:\\"The command should be run from the Kibana -> Dev Tools\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/logs-sampler-composable-pipelines-part-1.json\\",rel:\\"nofollow\\",children:\\"The code can be found here\\"}),\\" for the following three sections of code.\\"]}),`\\n`,(0,n.jsxs)(\\"details\\",{open:!0,children:[(0,n.jsx)(\\"summary\\",{children:\\"logs-sampler pipeline code - click to open/close\\"}),(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`# logs-sampler pipeline - part 1\\nDELETE _ingest/pipeline/logs-sampler\\nPUT _ingest/pipeline/logs-sampler\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"if\\": \\"ctx.sample.sample_rate == null\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 10000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Determine if keeping unsampled docs\\",\\n \\"if\\": \\"ctx.sample.keep_unsampled == null\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"sample.sampled\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"script\\": {\\n \\"source\\": \\"\\"\\" Random r = new Random();\\n ctx.sample.random = r.nextInt(params.max); \\"\\"\\",\\n \\"params\\": {\\n \\"max\\": 10000\\n }\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx.sample.random <= ctx.sample.sample_rate\\",\\n \\"field\\": \\"sample.sampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"drop\\": {\\n \\"description\\": \\"Drop unsampled document if applicable\\",\\n \\"if\\": \\"ctx.sample.keep_unsampled == false && ctx.sample.sampled == false\\"\\n }\\n }\\n ]\\n}\\n`})})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now, let\'s test the logs sampler. We will build the first part of the composable pipeline. We will be sending logs to the logs-generic-default data stream. With that in mind, we will create the \\",(0,n.jsx)(e.code,{children:\\"logs@custom\\"}),\\" ingest pipeline that will be automatically called using the logs \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/data-streams.html#data-streams-pipelines\\",rel:\\"nofollow\\",children:\\"data stream framework\\"}),\\" for customization. We will add one additional level of abstraction so that you can apply this PII processing to other data streams.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Next, we will create the \\",(0,n.jsx)(e.code,{children:\\"process-pii\\"}),\\" pipeline. This is the core processing pipeline where we will orchestrate PII processing component pipelines. In this first step, we will simply apply the sampling logic. Note that we are setting the sampling rate to 100, which is equivalent to 10% of the logs.\\"]}),`\\n`,(0,n.jsxs)(\\"details\\",{open:!0,children:[(0,n.jsx)(\\"summary\\",{children:\\"process-pii pipeline code - click to open/close\\"}),(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`# Process PII pipeline - part 1\\nDELETE _ingest/pipeline/process-pii\\nPUT _ingest/pipeline/process-pii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set true if enabling sampling, otherwise false\\",\\n \\"field\\": \\"sample.enabled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 1000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == true\\",\\n \\"name\\": \\"logs-sampler\\",\\n \\"ignore_failure\\": true\\n }\\n }\\n ]\\n}\\n`})})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Finally, we create the logs \\",(0,n.jsx)(e.code,{children:\\"logs@custom\\"}),\\", which will simply call our \\",(0,n.jsx)(e.code,{children:\\"process-pii\\"}),\\" pipeline based on the correct \\",(0,n.jsx)(e.code,{children:\\"data_stream.dataset\\"})]}),`\\n`,(0,n.jsxs)(\\"details\\",{open:!0,children:[(0,n.jsx)(\\"summary\\",{children:\\"logs@custom pipeline code - click to open/close\\"}),(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`# logs@custom pipeline - part 1\\nDELETE _ingest/pipeline/logs@custom\\nPUT _ingest/pipeline/logs@custom\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"pipelinetoplevel\\",\\n \\"value\\": \\"logs@custom\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"pipelinetoplevelinfo\\",\\n \\"value\\": \\"{{{data_stream.dataset}}}\\"\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"description\\" : \\"Call the process_pii pipeline on the correct dataset\\",\\n \\"if\\": \\"ctx?.data_stream?.dataset == \'pii\'\\", \\n \\"name\\": \\"process-pii\\"\\n }\\n }\\n ]\\n}\\n`})})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Now, let\'s test to see the sampling at work.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Load the data as described here \\",(0,n.jsx)(e.a,{href:\\"#data-loading-appendix\\",children:\\"Data Loading Appendix\\"}),\\". Let\'s use the sample data first, and we will talk about how to test with your incoming or historical logs later at the end of this blog.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"If you look at Observability -> Logs -> Logs Explorer with KQL filter \\",(0,n.jsx)(e.code,{children:\\"data_stream.dataset : pii\\"}),\\" and Breakdown by sample.sampled, you should see the breakdown to be approximately 10%\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-1/pii-discover-1-part-1.png\\",alt:\\"PII Discover 1\\",width:\\"3380\\",height:\\"1826\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\'At this point we have a composable ingest pipeline that is \\"sampling\\" logs. As a bonus, you can use this logs sampler for any other use cases you have as well.\'}),`\\n`,(0,n.jsx)(e.h4,{id:\\"loading-configuration-and-execution-of-the-ner-pipeline\\",children:\\"Loading, Configuration, and Execution of the NER Pipeline\\"}),`\\n`,(0,n.jsx)(e.h5,{id:\\"loading-the-ner-model\\",children:\\"Loading the NER Model\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You will need a Machine Learning node to run the NER model on. In this exercise, we are using \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html\\",rel:\\"nofollow\\",children:\\"Elastic Cloud Hosted Deployment \\"}),\\"on AWS with the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec_selecting_the_right_configuration_for_you.html\\",rel:\\"nofollow\\",children:\\"CPU Optimized (ARM)\\"}),\\" architecture. The NER inference will run on a Machine Learning AWS c5d node. There will be GPU options in the future, but today, we will stick with CPU architecture.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"This exercise will use a single c5d with 8 GB RAM with 4.2 vCPU up to 8.4 vCPU\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-1/pii-ml-node-part-1.png\\",alt:\\"ML Node\\",width:\\"2806\\",height:\\"1360\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Please refer to the official documentation on \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-import-model.html\\",rel:\\"nofollow\\",children:\\"how to import an NLP-trained model into Elasticsearch\\"}),\\" for complete instructions on uploading, configuring, and deploying the model.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The quickest way to get the model is using the Eland Docker method.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The following command will load the model into Elasticsearch but will not start it. We will do that in the next step.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`docker run -it --rm --network host docker.elastic.co/eland/eland \\\\\\\\\\n eland_import_hub_model \\\\\\\\\\n --url https://mydeployment.es.us-west-1.aws.found.io:443/ \\\\\\\\\\n -u elastic -p password \\\\\\\\\\n --hub-model-id dslim/bert-base-NER --task-type ner\\n\\n`})}),`\\n`,(0,n.jsx)(e.h5,{id:\\"deploy-and-start-the-ner-model\\",children:\\"Deploy and Start the NER Model\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In general, to improve ingest performance, increase throughput by adding more allocations to the deployment. For improved search speed, increase the number of threads per allocation.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To scale ingest, we will focus on scaling the allocations for the deployed model. More information on this topic is available \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-deploy-model.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". The number of allocations must be less than the available allocated processors (cores, not vCPUs) per node.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To deploy and start the NER Model. We will do this using the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/8.15/start-trained-model-deployment.html\\",rel:\\"nofollow\\",children:\\"Start trained model deployment API\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"We will configure the following:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"4 Allocations to allow for more parallel ingestion\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"1 Thread per Allocation\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"0 Byes Cache, as we expect a low cache hit rate\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"8192 Queue\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`# Start the model with 4 Allocators x 1 Thread, no cache, and 8192 queue\\nPOST _ml/trained_models/dslim__bert-base-ner/deployment/_start?cache_size=0b&number_of_allocations=4&threads_per_allocation=1&queue_capacity=8192\\n\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You should get a response that looks something like this.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`{\\n \\"assignment\\": {\\n \\"task_parameters\\": {\\n \\"model_id\\": \\"dslim__bert-base-ner\\",\\n \\"deployment_id\\": \\"dslim__bert-base-ner\\",\\n \\"model_bytes\\": 430974836,\\n \\"threads_per_allocation\\": 1,\\n \\"number_of_allocations\\": 4,\\n \\"queue_capacity\\": 8192,\\n \\"cache_size\\": \\"0\\",\\n \\"priority\\": \\"normal\\",\\n \\"per_deployment_memory_bytes\\": 430914596,\\n \\"per_allocation_memory_bytes\\": 629366952\\n },\\n...\\n \\"assignment_state\\": \\"started\\",\\n \\"start_time\\": \\"2024-09-23T21:39:18.476066615Z\\",\\n \\"max_assigned_allocations\\": 4\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The NER model has been deployed and started and is ready to be used.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The following ingest pipeline implements the NER model via the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-processor.html\\",rel:\\"nofollow\\",children:\\"inference\\"}),\\" processor.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"There is a significant amount of code here, but only two items of interest now exist. The rest of the code is conditional logic to drive some additional specific behavior that we will look closer at in the future.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The inference processor calls the NER model by ID, which we loaded previously, and passes the text to be analyzed, which, in this case, is the message field, which is the text_field we want to pass to the NER model to analyze for PII.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"The script processor loops through the message field and uses the data generated by the NER model to replace the identified PII with redacted placeholders. This looks more complex than it really is, as it simply loops through the array of ML predictions and replaces them in the message string with constants, and stores the results in a new field \\",(0,n.jsx)(e.code,{children:\\"redact.message\\"}),\\". We will look at this a little closer in the following steps.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/logs-sampler-composable-pipelines-part-2.json\\",rel:\\"nofollow\\",children:\\"The code can be found here\\"}),\\" for the following three sections of code.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The NER PII Pipeline\\"}),`\\n`,(0,n.jsxs)(\\"details\\",{open:!0,children:[(0,n.jsx)(\\"summary\\",{children:\\"logs-ner-pii-processor pipeline code - click to open/close\\"}),(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`# NER Pipeline\\nDELETE _ingest/pipeline/logs-ner-pii-processor\\nPUT _ingest/pipeline/logs-ner-pii-processor\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to true to actually redact, false will run processors but leave original\\",\\n \\"field\\": \\"redact.enable\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to true to keep ml results for debugging\\",\\n \\"field\\": \\"redact.ner.keep_result\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to PER, LOC, ORG to skip, or NONE to not drop any replacement\\",\\n \\"field\\": \\"redact.ner.skip_entity\\",\\n \\"value\\": \\"NONE\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to PER, LOC, ORG to skip, or NONE to not drop any replacement\\",\\n \\"field\\": \\"redact.ner.minimum_score\\",\\n \\"value\\": 0\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.message == null\\",\\n \\"field\\": \\"redact.message\\",\\n \\"copy_from\\": \\"message\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.ner.successful\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.ner.found\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"inference\\": {\\n \\"model_id\\": \\"dslim__bert-base-ner\\",\\n \\"field_map\\": {\\n \\"message\\": \\"text_field\\"\\n },\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set \'error.message\'\\",\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"REDACT_NER_FAILED\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.ner.successful\\",\\n \\"value\\": false\\n }\\n }\\n ]\\n }\\n },\\n {\\n \\"script\\": {\\n \\"if\\": \\"ctx.failure_ner != \'REDACT_NER_FAILED\'\\",\\n \\"lang\\": \\"painless\\",\\n \\"source\\": \\"\\"\\"String msg = ctx[\'message\'];\\n for (item in ctx[\'ml\'][\'inference\'][\'entities\']) {\\n \\tif ((item[\'class_name\'] != ctx.redact.ner.skip_entity) && \\n \\t (item[\'class_probability\'] >= ctx.redact.ner.minimum_score)) { \\n \\t\\t msg = msg.replace(item[\'entity\'], \'<\' + \\n \\t\\t \'REDACTNER-\'+ item[\'class_name\'] + \'_NER>\')\\n \\t}\\n }\\n ctx.redact.message = msg\\"\\"\\",\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set \'error.message\'\\",\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"REDACT_REPLACEMENT_SCRIPT_FAILED\\",\\n \\"override\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.successful\\",\\n \\"value\\": false\\n }\\n }\\n ]\\n }\\n },\\n \\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.ml?.inference?.entities.size() > 0\\", \\n \\"field\\": \\"redact.ner.found\\",\\n \\"value\\": true,\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.pii?.found == null\\",\\n \\"field\\": \\"redact.pii.found\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.ner?.found == true\\",\\n \\"field\\": \\"redact.pii.found\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"if\\": \\"ctx.redact.ner.keep_result != true\\",\\n \\"field\\": [\\n \\"ml\\"\\n ],\\n \\"ignore_missing\\": true,\\n \\"ignore_failure\\": true\\n }\\n }\\n ],\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"GENERAL_FAILURE\\",\\n \\"override\\": false\\n }\\n }\\n ]\\n}\\n`})})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The updated PII Processor Pipeline, which now calls the NER Pipeline\\"}),`\\n`,(0,n.jsxs)(\\"details\\",{open:!0,children:[(0,n.jsx)(\\"summary\\",{children:\\"process-pii pipeline code - click to open/close\\"}),(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`# Updated Process PII pipeline that now call the NER pipeline\\nDELETE _ingest/pipeline/process-pii\\nPUT _ingest/pipeline/process-pii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set true if enabling sampling, otherwise false\\",\\n \\"field\\": \\"sample.enabled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 1000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == true\\",\\n \\"name\\": \\"logs-sampler\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-ner-pii-processor\\"\\n }\\n }\\n ]\\n}\\n\\n`})})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now reload the data as described here in \\",(0,n.jsx)(e.a,{href:\\"#reloading-the-logs\\",children:\\"Reloading the logs\\"})]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"results\\",children:\\"Results\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[`Let\'s take a look at the results with the NER processing in place. In the Logs Explorer with KQL query bar, execute the following query\\n`,(0,n.jsx)(e.code,{children:\'data_stream.dataset : pii and ml.inference.entities.class_name : (\\"PER\\" and \\"LOC\\" and \\"ORG\\" )\'})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Logs Explorer should look something like this, open the top message to see the details.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-1/pii-discover-2-part-1.png\\",alt:\\"PII Discover 2\\",width:\\"3616\\",height:\\"1824\\"})}),`\\n`,(0,n.jsx)(e.h4,{id:\\"ner-model-results\\",children:\\"NER Model Results\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Lets take a closer look at what these fields mean.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Field:\\"}),\\" \\",(0,n.jsx)(e.code,{children:\\"ml.inference.entities.class_name\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.strong,{children:\\"Sample Value:\\"}),\\" \\",(0,n.jsx)(e.code,{children:\\"[PER, PER, LOC, ORG, ORG]\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.strong,{children:\\"Description:\\"}),\\" An array of the named entity classes that the NER model has identified.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Field:\\"}),\\" \\",(0,n.jsx)(e.code,{children:\\"ml.inference.entities.class_probability\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.strong,{children:\\"Sample Value:\\"}),\\" \\",(0,n.jsx)(e.code,{children:\\"[0.999, 0.972, 0.896, 0.506, 0.595]\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.strong,{children:\\"Description:\\"}),\\" The class_probability is a value between 0 and 1, which indicates how likely it is that a given data point belongs to a certain class. The higher the number, the higher the probability that the data point belongs to the named class. \\",(0,n.jsx)(e.strong,{children:\\"This is important as in the next blog we can decide a threshold that we will want to use to alert and redact on.\\"}),`\'\\nYou can see in this example it identified a `,(0,n.jsx)(e.code,{children:\\"LOC\\"}),\\" as an \\",(0,n.jsx)(e.code,{children:\\"ORG\\"}),\\", we can filter this out / find them by setting a threshold.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Field:\\"}),\\" \\",(0,n.jsx)(e.code,{children:\\"ml.inference.entities.entity\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.strong,{children:\\"Sample Value:\\"}),\\" \\",(0,n.jsx)(e.code,{children:\\"[Paul Buck, Steven Glens, South Amyborough, ME, Costco]\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.strong,{children:\\"Description:\\"}),\\" The array of entities identified that align positionally with the \\",(0,n.jsx)(e.code,{children:\\"class_name\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"class_probability\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Field:\\"}),\\" \\",(0,n.jsx)(e.code,{children:\\"ml.inference.predicted_value\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.strong,{children:\\"Sample Value:\\"}),\\" \\",(0,n.jsx)(e.code,{children:\\"[2024-09-23T14:32:14.608207-07:00Z] log.level=INFO: Payment successful for order #4594 (user: [Paul Buck](PER&Paul+Buck), david59@burgess.net). Phone: 726-632-0527x520, Address: 3713 [Steven Glens](PER&Steven+Glens), [South Amyborough](LOC&South+Amyborough), [ME](ORG&ME) 93580, Ordered from: [Costco](ORG&Costco)\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.strong,{children:\\"Description:\\"}),\\" The predicted value of the model.\\"]}),`\\n`,(0,n.jsx)(e.h4,{id:\\"pii-assessment-dashboard\\",children:\\"PII Assessment Dashboard\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Lets take a quick look at a dashboard built to assess PII the data.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To load the dashboard, go to Kibana -> Stack Management -> Saved Objects and import the \\",(0,n.jsx)(e.code,{children:\\"pii-dashboard-part-1.ndjson\\"}),\\" file that can be found here:\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/pii-dashboard-part-1.ndjson\\",rel:\\"nofollow\\",children:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/pii-dashboard-part-1.ndjson\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"More complete instructions on Kibana Saved Objects can be found \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/managing-saved-objects.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"After loading the dashboard, navigate to it and select the right time range and you should see something like below. It shows metrics such as sample rate, percent of logs with NER, NER Score Trends etc. We will examine the assessment and actions in part 2 of this blog.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-1/pii-dashboard-1-part-1.png\\",alt:\\"PII Dashboard 1\\",width:\\"2496\\",height:\\"1510\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"summary-and-next-steps\\",children:\\"Summary and Next Steps\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"In this first part of the blog, we have accomplished the following.\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Reviewed the techniques and tools we have available for PII detection and assement\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Reviewed NLP / NER role in PII detection and assessment\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Built the necessary composable ingest pipelines to sample logs and run them through the NER Model\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Reviewed the NER results and are ready to move to the second blog\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In the upcoming \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-2\\",rel:\\"nofollow\\",children:\\"Part 2 of this blog\\"}),\\" of this blog, we will cover the following:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Redact PII using NER and redact processor\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Apply field-level security to control access to the un-redacted data\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Enhance the dashboards and alerts\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Production considerations and scaling\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"How to run these processes on incoming or historical data\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"data-loading-appendix\\",children:\\"Data Loading Appendix\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"code\\",children:\\"Code\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The data loading code can be found here:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii\\",rel:\\"nofollow\\",children:\\"https://github.com/bvader/elastic-pii\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ git clone https://github.com/bvader/elastic-pii.git\\n`})}),`\\n`,(0,n.jsx)(e.h4,{id:\\"creating-and-loading-the-sample-data-set\\",children:\\"Creating and Loading the Sample Data Set\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ cd elastic-pii\\n$ cd python\\n$ python -m venv .env\\n$ source .env/bin/activate\\n$ pip install elasticsearch\\n$ pip install Faker\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Run the log generator\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ python generate_random_logs.py\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you do not changes any parameters, this will create 10000 random logs in a file named pii.log with a mix of logs that containe and do not contain PII.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Edit \\",(0,n.jsx)(e.code,{children:\\"load_logs.py\\"}),\\" and set the following\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`# The Elastic User \\nELASTIC_USER = \\"elastic\\"\\n\\n# Password for the \'elastic\' user generated by Elasticsearch\\nELASTIC_PASSWORD = \\"askdjfhasldfkjhasdf\\"\\n\\n# Found in the \'Manage Deployment\' page\\nELASTIC_CLOUD_ID = \\"deployment:sadfjhasfdlkjsdhf3VuZC5pbzo0NDMkYjA0NmQ0YjFiYzg5NDM3ZDgxM2YxM2RhZjQ3OGE3MzIkZGJmNTE0OGEwODEzNGEwN2E3M2YwYjcyZjljYTliZWQ=\\"\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Then run the following command.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ python load_logs.py\\n`})}),`\\n`,(0,n.jsx)(e.h4,{id:\\"reloading-the-logs\\",children:\\"Reloading the logs\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Note\\"}),\\" To reload the logs, you can simply re-run the above command. You can run the command multiple time during this exercise and the logs will be reloaded (actually loaded again). The new logs will not collide with previous runs as there will be a unique \\",(0,n.jsx)(e.code,{children:\\"run.id\\"}),\\" for each run which is displayed at the end of the loading process.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ python load_logs.py\\n`})})]})}function h(i={}){let{wrapper:e}=i.components||{};return e?(0,n.jsx)(e,{...i,children:(0,n.jsx)(c,{...i})}):c(i)}return _(E);})();\\n;return Component;"},"_id":"articles/pii-ner-regex-assess-redact-part-1.mdx","_raw":{"sourceFilePath":"articles/pii-ner-regex-assess-redact-part-1.mdx","sourceFileName":"pii-ner-regex-assess-redact-part-1.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/pii-ner-regex-assess-redact-part-1"},"type":"Article","imageUrl":"/assets/images/pii-ner-regex-assess-redact-part-1/pii-ner-regex-assess-redact-part-1.png","readingTime":"26 min read","url":"/pii-ner-regex-assess-redact-part-1","headings":[{"level":2,"title":"Introduction:","href":"#introduction"},{"level":2,"title":"Tools and Techniques","href":"#tools-and-techniques"},{"level":4,"title":"Named Entity Recognition (NER) Detection","href":"#named-entity-recognition-ner-detection"},{"level":4,"title":"Pattern Matching Detection ","href":"#pattern-matching-detection-"},{"level":4,"title":"Log Sampling","href":"#log-sampling"},{"level":4,"title":"Ingest Pipelines as Composable Processing ","href":"#ingest-pipelines-as-composable-processing-"},{"level":2,"title":"Building the Processing Flow ","href":"#building-the-processing-flow-"},{"level":4,"title":"Logs Sampling + Composable Ingest Pipelines","href":"#logs-sampling--composable-ingest-pipelines"},{"level":4,"title":"Loading, Configuration, and Execution of the NER Pipeline","href":"#loading-configuration-and-execution-of-the-ner-pipeline"},{"level":5,"title":"Loading the NER Model","href":"#loading-the-ner-model"},{"level":5,"title":"Deploy and Start the NER Model","href":"#deploy-and-start-the-ner-model"},{"level":3,"title":"Results","href":"#results"},{"level":4,"title":"NER Model Results ","href":"#ner-model-results-"},{"level":4,"title":"PII Assessment Dashboard","href":"#pii-assessment-dashboard"},{"level":2,"title":"Summary and Next Steps","href":"#summary-and-next-steps"},{"level":2,"title":"Data Loading Appendix","href":"#data-loading-appendix"},{"level":4,"title":"Code","href":"#code"},{"level":4,"title":"Creating and Loading the Sample Data Set ","href":"#creating-and-loading-the-sample-data-set-"},{"level":4,"title":"Reloading the logs","href":"#reloading-the-logs"}]},{"title":"Using NLP and Pattern Matching to Detect, Assess, and Redact PII in Logs - Part 2","slug":"pii-ner-regex-assess-redact-part-2","date":"2024-10-22","description":"How to detect, assess, and redact PII in your logs using Elasticsearch, NLP and Pattern Matching","image":"pii-ner-regex-assess-redact-part-2.png","author":[{"slug":"stephen-brown","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\n## Introduction:\\n\\nThe prevalence of high-entropy logs in distributed systems has significantly raised the risk of PII (Personally Identifiable Information) seeping into our logs, which can result in security and compliance issues. This 2-part blog delves into the crucial task of identifying and managing this issue using the Elastic Stack. We will explore using NLP (Natural Language Processing) and Pattern matching to detect, assess, and, where feasible, redact PII from logs being ingested into Elasticsearch.\\n\\nIn [Part 1 of this blog](https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1), we covered the following:\\n\\n* Review the techniques and tools we have available to manage PII in our logs\\n* Understand the roles of NLP / NER in PII detection\\n* Build a composable processing pipeline to detect and assess PII\\n* Sample logs and run them through the NER Model\\n* Assess the results of the NER Model \\n\\nIn **Part 2** of this blog, we will cover the following:\\n\\n* Apply the `redact` regex pattern processor and assess the results\\n* Create Alerts using ESQL\\n* Apply field-level security to control access to the un-redacted data\\n* Production considerations and scaling\\n* How to run these processes on incoming or historical data\\n\\nReminder of the overall flow we will construct over the 2 blogs:\\n\\n![PII Overall Flow](/assets/images/pii-ner-regex-assess-redact-part-1/pii-overall-flow.png)\\n\\nAll code for this exercise can be found at:\\n[https://github.com/bvader/elastic-pii](https://github.com/bvader/elastic-pii). \\n\\n### Part 1 Prerequisites\\nThis blog picks up where [Part 1 of this blog](https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1) left off. You must have the NER model, ingest pipelines, and dashboard from Part 1 installed and working.\\n\\n* Loaded and configured NER Model \\n* Installed all the composable ingest pipelines from Part 1 of the blog\\n* Installed dashboard\\n\\nYou can access the [complete solution for Blog 1 here](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/logs-sampler-composable-pipelines-blog-1-complete.json). Don\'t forget to load the dashboard, found [here](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/pii-dashboard-part-1.ndjson).\\n\\n### Applying the Redact Processor\\n\\nNext, we will apply the [`redact` processor](https://www.elastic.co/guide/en/elasticsearch/reference/current/redact-processor.html). The `redact` processor is a simple regex-based processor that takes a list of regex patterns and looks for them in a field and replaces them with literals when found. The `redact` processor is reasonably performant and can run at scale. At the end, we will discuss this in detail in the [production scaling](#production-scaling) section.\\n\\nElasticsearch comes packaged with a number of useful predefined [patterns](https://github.com/elastic/elasticsearch/blob/8.15/libs/grok/src/main/resources/patterns/ecs-v1) that can be conveniently referenced by the `redact` processor. If one does not suit your needs, create a new pattern with a custom definition. The Redact processor replaces every occurrence of a match. If there are multiple matches, they will all be replaced with the pattern name.\\n\\nIn the code below, we leveraged some of the predefined patterns as well as constructing several custom patterns.\\n\\n```bash\\n \\"patterns\\": [\\n \\"%{EMAILADDRESS:EMAIL_REGEX}\\", << Predefined\\n \\"%{IP:IP_ADDRESS_REGEX}\\", << Predefined\\n \\"%{CREDIT_CARD:CREDIT_CARD_REGEX}\\", << Custom\\n \\"%{SSN:SSN_REGEX}\\", << Custom\\n \\"%{PHONE:PHONE_REGEX}\\" << Custom\\n ]\\n```\\n\\nWe also replaced the PII with easily identifiable patterns we can use for assessment. \\n\\nIn addition, it is important to note that since the redact processor is a simple regex find and replace, it can be used against many \\"secrets\\" patterns, not just PII. There are many references for regex and secrets patterns, so you can reuse this capability to detect secrets in your logs.\\n\\n[The code can be found here](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-composable-pipelines-blog-2-redact-processor-1.json) for the following two sections of code. \\n\\n\\n\\n redact processor pipeline code - click to open/close
\\n\\n```bash\\n# Add the PII redact processor pipeline\\nDELETE _ingest/pipeline/logs-pii-redact-processor\\nPUT _ingest/pipeline/logs-pii-redact-processor\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.proc.successful\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.proc.found\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.message == null\\",\\n \\"field\\": \\"redact.message\\",\\n \\"copy_from\\": \\"message\\"\\n }\\n },\\n {\\n \\"redact\\": {\\n \\"field\\": \\"redact.message\\",\\n \\"prefix\\": \\"\\",\\n \\"patterns\\": [\\n \\"%{EMAILADDRESS:EMAIL_REGEX}\\",\\n \\"%{IP:IP_ADDRESS_REGEX}\\",\\n \\"%{CREDIT_CARD:CREDIT_CARD_REGEX}\\",\\n \\"%{SSN:SSN_REGEX}\\",\\n \\"%{PHONE:PHONE_REGEX}\\"\\n ],\\n \\"pattern_definitions\\": {\\n \\"CREDIT_CARD\\": \\"\\"\\"\\\\d{4}[ -]\\\\d{4}[ -]\\\\d{4}[ -]\\\\d{4}\\"\\"\\",\\n \\"SSN\\": \\"\\"\\"\\\\d{3}-\\\\d{2}-\\\\d{4}\\"\\"\\",\\n \\"PHONE\\": \\"\\"\\"(\\\\+\\\\d{1,2}\\\\s?)?1?\\\\-?\\\\.?\\\\s?\\\\(?\\\\d{3}\\\\)?[\\\\s.-]?\\\\d{3}[\\\\s.-]?\\\\d{4}\\"\\"\\"\\n },\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set \'error.message\'\\",\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"REDACT_PROCESSOR_FAILED\\",\\n \\"override\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.proc.successful\\",\\n \\"value\\": false\\n }\\n }\\n ]\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.message.contains(\'REDACTPROC\')\\",\\n \\"field\\": \\"redact.proc.found\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.pii?.found == null\\",\\n \\"field\\": \\"redact.pii.found\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.proc?.found == true\\",\\n \\"field\\": \\"redact.pii.found\\",\\n \\"value\\": true\\n }\\n }\\n ],\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"GENERAL_FAILURE\\",\\n \\"override\\": false\\n }\\n }\\n ]\\n}\\n```\\n \\n\\nAnd now, we will add the `logs-pii-redact-processor` pipeline to the overall `process-pii` pipeline \\n\\n redact processor pipeline code - click to open/close
\\n\\n```bash\\n# Updated Process PII pipeline that now call the NER and Redact Processor pipeline\\nDELETE _ingest/pipeline/process-pii\\nPUT _ingest/pipeline/process-pii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set true if enabling sampling, otherwise false\\",\\n \\"field\\": \\"sample.enabled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 1000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == true\\",\\n \\"name\\": \\"logs-sampler\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-ner-pii-processor\\"\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-pii-redact-processor\\"\\n }\\n }\\n ]\\n}\\n```\\n \\n\\nReload the data as described in the [Reloading the logs](#reloading-the-logs). If you have not generated the logs the first time, follow the instructions in the [Data Loading Appendix](#data-loading-appendix)\\n\\nGo to Discover and enter the following into the KQL bar\\n`sample.sampled : true and redact.message: REDACTPROC` and add the `redact.message` to the table and you should see something like this.\\n\\n![PII Discover Blog 2 Part 1](/assets/images/pii-ner-regex-assess-redact-part-2/pii-discover-1-part-2.png)\\n\\n\\nAnd if you did not load the dashboard from Blog Part 1 at already, load it, it can be found [here](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/pii-dashboard-part-1.ndjson) using the Kibana -> Stack Management -> Saved Objects -> Import. \\n\\nIt should look something like this now. Note that the REGEX portions of the dashboard are now active.\\n\\n![PII Dashboards Blog 2 Part 1](/assets/images/pii-ner-regex-assess-redact-part-2/pii-dashboard-1-part-2.png)\\n\\n## Checkpoint \\nAt this point, we have the following capabilities:\\n\\n* Ability to sample incoming logs and apply this PII redaction \\n* Detect and Assess PII with the NER/NLP and Pattern Matching\\n* Assess the amount, type and quality of the PII detections\\n\\nThis is a great point to stop if you are just running all this once to see how it works, but we have a few more steps to make this useful in production systems.\\n\\n* Clean up the working and unredacted data\\n* Update the Dashboard to work with the cleaned-up data\\n* Apply Role Based Access Control to protect the raw unredacted data\\n* Create Alerts\\n* Production and Scaling Considerations\\n* How to run these processes on incoming or historical data\\n\\n## Applying to Production Systems\\n\\n### Cleanup working data and update the dashboard\\n\\nAnd now we will add the cleanup code to the overall `process-pii` pipeline.\\n\\nIn short, we set a flag `redact.enable: true` that directs the pipeline to move the unredacted `message` field to `raw.message` and the move the redacted message field `redact.message`to the `message` field. We will \\"protect\\" the `raw.message` in the following section. \\n\\n**NOTE:** Of course you can change this behavior if you want to completely delete the unredacted data. In this exercise we will keep it and protect it. \\n\\nIn addition we set `redact.cleanup: true` to clean up the NLP working data.\\n\\nThese fields allow a lot of control over what data you decide to keep and analyze. \\n\\n[The code can be found here](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-composable-pipelines-blog-2-redact-processor-2.json) for the following two sections of code. \\n\\n\\n redact processor pipeline code - click to open/close
\\n\\n```bash\\n# Updated Process PII pipeline that now call the NER and Redact Processor pipeline and cleans up \\nDELETE _ingest/pipeline/process-pii\\nPUT _ingest/pipeline/process-pii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set true if enabling sampling, otherwise false\\",\\n \\"field\\": \\"sample.enabled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 1000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == true\\",\\n \\"name\\": \\"logs-sampler\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-ner-pii-processor\\"\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-pii-redact-processor\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to true to actually redact, false will run processors but leave original\\",\\n \\"field\\": \\"redact.enable\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"if\\": \\"ctx?.redact?.pii?.found == true && ctx?.redact?.enable == true\\",\\n \\"field\\": \\"message\\",\\n \\"target_field\\": \\"raw.message\\"\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"if\\": \\"ctx?.redact?.pii?.found == true && ctx?.redact?.enable == true\\",\\n \\"field\\": \\"redact.message\\",\\n \\"target_field\\": \\"message\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to true to actually to clean up working data\\",\\n \\"field\\": \\"redact.cleanup\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"if\\": \\"ctx?.redact?.cleanup == true\\",\\n \\"field\\": [\\n \\"ml\\"\\n ],\\n \\"ignore_failure\\": true\\n }\\n }\\n ]\\n}\\n```\\n \\n\\nReload the data as described here in the [Reloading the logs](#reloading-the-logs). \\n\\n\\nGo to Discover and enter the following into the KQL bar\\n`sample.sampled : true and redact.pii.found: true` and add the following fields to the table\\n\\n`message`,`raw.message`,`redact.ner.found`,`redact.proc.found`,`redact.pii.found`\\n\\nYou should see something like this\\n![PII Discover Part 2 Blog 2](/assets/images/pii-ner-regex-assess-redact-part-2/pii-discover-2-part-2.png)\\n\\nWe have everything we need to move forward with protecting the PII and Alerting on it. \\n\\nLoad up the new dashboard that works on the cleaned-up data \\n\\nTo load the dashboard, go to Kibana -> Stack Management -> Saved Objects and import the `pii-dashboard-part-2.ndjson` file that can be found [here](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-dashboard-part-2.ndjson). \\n\\n\\nThe new dashboard should look like this. Note: It uses different fields under the covers since we have cleaned up the underlying data. \\n\\nYou should see something like this\\n![PII Dashboard Part 2 Blog 2](/assets/images/pii-ner-regex-assess-redact-part-2/pii-dashboard-2-part-2.png)\\n\\n\\n### Apply Role Based Access Control to protect the raw unredacted data\\n\\nElasticsearch supports role-based access control, including field and document level access control natively; it dramatically reduces the operational and maintenance complexity required to secure our application.\\n\\nWe will create a Role that does not allow access to the `raw.message` field and then create a user and assign that user the role. With that role, the user will only be able to see the redacted message, which is now in the `message` field, but will not be able to access the protected `raw.message` field.\\n\\n**NOTE:** Since we only sampled 10% of the data in this exercise the non-sampled `message` fields are not moved to the `raw.message`, so they are still viewable, but this shows the capability you can apply in a production system.\\n\\n\\n[The code can be found here](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-composable-pipelines-blog-2-rbac.json) for the following section of code. \\n\\n\\n\\n RBAC protect-pii role and user code - click to open/close
\\n\\n ```bash\\n# Create role with no access to the raw.message field\\nGET _security/role/protect-pii\\nDELETE _security/role/protect-pii\\nPUT _security/role/protect-pii\\n{\\n \\"cluster\\": [],\\n \\"indices\\": [\\n {\\n \\"names\\": [\\n \\"logs-*\\"\\n ],\\n \\"privileges\\": [\\n \\"read\\",\\n \\"view_index_metadata\\"\\n ],\\n \\"field_security\\": {\\n \\"grant\\": [\\n \\"*\\"\\n ],\\n \\"except\\": [\\n \\"raw.message\\"\\n ]\\n },\\n \\"allow_restricted_indices\\": false\\n }\\n ],\\n \\"applications\\": [\\n {\\n \\"application\\": \\"kibana-.kibana\\",\\n \\"privileges\\": [\\n \\"all\\"\\n ],\\n \\"resources\\": [\\n \\"*\\"\\n ]\\n }\\n ],\\n \\"run_as\\": [],\\n \\"metadata\\": {},\\n \\"transient_metadata\\": {\\n \\"enabled\\": true\\n }\\n}\\n\\n# Create user stephen with protect-pii role\\nGET _security/user/stephen\\nDELETE /_security/user/stephen\\nPOST /_security/user/stephen\\n{\\n \\"password\\" : \\"mypassword\\",\\n \\"roles\\" : [ \\"protect-pii\\" ],\\n \\"full_name\\" : \\"Stephen Brown\\"\\n}\\n\\n```\\n \\n\\n \\nNow log into a separate window with the new user `stephen` with the `protect-pii role`. Go to Discover and put `redact.pii.found : true` in the KQL bar and add the `message` field to the table. Also, notice that the `raw.message` is not available. \\n\\nYou should see something like this\\n![PII Dashboard Part 2 Blog 2](/assets/images/pii-ner-regex-assess-redact-part-2/pii-discover-3-part-2.png)\\n\\n### Create an Alert when PII Detected\\n\\n Now, with the processing of the pipelines, creating an alert when PII is detected is easy. To review [Alerting in Kibana](https://www.elastic.co/guide/en/kibana/current/alerting-getting-started.html) in detail if needed \\n\\n NOTE: [Reload](#reloading-the-logs) the data if needed to have recent data. \\n\\n First, we will create a simple [ES|QL query](https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html) in Discover. \\n\\n [The code can be found here.](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-esql-alert-blog-2.txt)\\n\\n```\\nFROM logs-pii-default\\n| WHERE redact.pii.found == true\\n| STATS pii_count = count(*)\\n| WHERE pii_count > 0\\n```\\n\\nWhen you run this you should see something like this.\\n\\n![PII ESQL Part 1 Blog 2](/assets/images/pii-ner-regex-assess-redact-part-2/pii-esql-1-part-2.png)\\n\\n\\nNow click the Alerts menu and select `Create search threshold rule`, and will create an alert to alert us when PII is found. \\n\\n**Select a time field: @timestamp\\nSet the time window: 5 minutes**\\n\\nAssuming you loaded the data recently when you run **Test** it should do something like \\n\\npii_count : `343`\\nAlerts generated `query matched`\\n\\nAdd an action when the alert is Active. \\n\\n**For each alert: `On status changes`\\nRun when: `Query matched`**\\n\\n```\\nElasticsearch query rule {{rule.name}} is active:\\n\\n- PII Found: true\\n- PII Count: {{#context.hits}} {{_source.pii_count}}{{/context.hits}}\\n- Conditions Met: {{context.conditions}} over {{rule.params.timeWindowSize}}{{rule.params.timeWindowUnit}}\\n- Timestamp: {{context.date}}\\n- Link: {{context.link}}\\n```\\n\\n\\nAdd an Action for when the Alert is Recovered. \\n\\n**For each alert: `On status changes`\\nRun when: `Recovered`**\\n\\n\\n```\\nElasticsearch query rule {{rule.name}} is Recovered:\\n\\n- PII Found: false\\n- Conditions Not Met: {{context.conditions}} over {{rule.params.timeWindowSize}}{{rule.params.timeWindowUnit}}\\n- Timestamp: {{context.date}}\\n- Link: {{context.link}}\\n```\\n\\nWhen all setup it should look like this and `Save`\\n\\n![Alert Setup](/assets/images/pii-ner-regex-assess-redact-part-2/pii-alert-1-part2.png)\\\\\\n![Action Alert](/assets/images/pii-ner-regex-assess-redact-part-2/pii-alert-2-part2.png)\\\\\\n![Action Alert](/assets/images/pii-ner-regex-assess-redact-part-2/pii-alert-3-part2.png)\\n\\n\\nYou should get an Active alert that looks like this if you have recent data. I sent mine to Slack. \\n\\n```\\nElasticsearch query rule pii-found-esql is active:\\n- PII Found: true\\n- PII Count: 374\\n- Conditions Met: Query matched documents over 5m\\n- Timestamp: 2024-10-15T02:44:52.795Z\\n- Link: https://mydeployment123.aws.found.io:9243/app/management/insightsAndAlerting/triggersActions/rule/7d6faecf-964e-46da-aaba-8a2f89f33989\\n```\\n\\nAnd then if you wait you will get a Recovered alert that looks like this. \\n\\n```\\nElasticsearch query rule pii-found-esql is Recovered:\\n- PII Found: false\\n- Conditions Not Met: Query did NOT match documents over 5m\\n- Timestamp: 2024-10-15T02:49:04.815Z\\n- Link: https://mydeployment123.kb.us-west-1.aws.found.io:9243/app/management/insightsAndAlerting/triggersActions/rule/7d6faecf-964e-46da-aaba-8a2f89f33989\\n```\\n\\n### Production Scaling\\n\\n#### NER Scaling\\n\\nAs we mentioned [Part 1 of this blog](https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1#named-entity-recognition-ner-detection) of this blog, NER / NLP Models are CPU-intensive and expensive to run at scale; thus, we employed a sampling technique to understand the risk in our logs without sending the full logs volume through the NER Model.\\n\\nPlease review [the setup and configuration of the NER](https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1#loading-configuration-and-execution-of-the-ner-pipeline) model from Part 1 of the blog.\\n\\nWe chose the base BERT NER model [bert-base-NER](https://huggingface.co/dslim/bert-base-NER) for our PII case.\\n\\nTo scale ingest, we will focus on scaling the allocations for the deployed model. More information on this topic is available [here](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-deploy-model.html). The number of allocations must be less than the available allocated processors (cores, not vCPUs) per node.\\n\\nThe metrics below are related to the model and configuration from Part 1 of the blog. \\n\\n* 4 Allocations to allow for more parallel ingestion\\n* 1 Thread per Allocation\\n* 0 Byes Cache, as we expect a low cache hit rate\\n**Note** If there are many repeated logs, cache can help, but with timestamps and other variations, cache will not help and can even slow down the process\\n* 8192 Queue \\n\\n\\n\\n```bash\\nGET _ml/trained_models/dslim__bert-base-ner/_stats\\n.....\\n \\"node\\": {\\n \\"0m4tq7tMRC2H5p5eeZoQig\\": {\\n.....\\n \\"attributes\\": {\\n \\"xpack.installed\\": \\"true\\",\\n \\"region\\": \\"us-west-1\\",\\n \\"ml.allocated_processors\\": \\"5\\", << HERE \\n.....\\n },\\n \\"inference_count\\": 5040,\\n \\"average_inference_time_ms\\": 138.44285714285715, << HERE \\n \\"average_inference_time_ms_excluding_cache_hits\\": 138.44285714285715,\\n \\"inference_cache_hit_count\\": 0,\\n.....\\n \\"threads_per_allocation\\": 1,\\n \\"number_of_allocations\\": 4, <<< HERE\\n \\"peak_throughput_per_minute\\": 1550,\\n \\"throughput_last_minute\\": 1373,\\n \\"average_inference_time_ms_last_minute\\": 137.55280407865988,\\n \\"inference_cache_hit_count_last_minute\\": 0\\n }\\n ]\\n }\\n }\\n``` \\n\\nThere are 3 key pieces of information above:\\n\\n* `\\"ml.allocated_processors\\": \\"5\\"`\\nThe number of physical cores / processors available \\n\\n* `\\"number_of_allocations\\": 4`\\nThe number of allocations which is maximum 1 per physical core. **Note**: we could have used 5 allocations, but we only allocated 4 for this exercise\\n\\n* `\\"average_inference_time_ms\\": 138.44285714285715`\\nThe averages inference time per document. \\n\\nThe math is pretty straightforward for throughput for Inferences per Min (IPM) per allocation (1 allocation per physical core), since an inference uses a single core and a single thread.\\n\\nThen the Inferences per Min per Allocation is simply: \\n\\n`IPM per allocation = 60,000 ms (in a minute) / 138ms per inference = 435`\\n\\nWhen then lines up with the Total Inferences per Minute\\n\\n`Total IPM = 435 IPM / allocation * 4 Allocations = ~1740`\\n\\nSuppose we want to do 10,000 IPMs, how many allocations (cores) would I need? \\n\\n`Allocations = 10,000 IPM / 435 IPM per allocation = 23 Allocation (cores rounded up)`\\n\\nOr perhaps logs are coming in at 5000 EPS and you want to do 1% Sampling. \\n\\n`IPM = 5000 EPS * 60sec * 0.01 sampling = 3000 IPM sampled`\\n\\nThen \\n\\n`Number of Allocators = 3000 IPM / 435 IPM per allocation = 7 allocations (cores rounded up)`\\n\\n**Want Faster!** Turns out there is a more lightweight NER Model [\\ndistilbert-NER](https://huggingface.co/dslim/distilbert-NER) model that is faster, but the tradeoff is a little less accuracy. \\n\\nRunning the logs through this model results in an inference time nearly twice as fast!\\n\\n`\\"average_inference_time_ms\\": 66.0263959390863`\\n\\nHere is some quick math:\\n`$IPM per allocation = 60,000 ms (in a minute) / 61ms per inference = 983`\\n\\nSuppose we want to do 25,000 IPMs, how many allocations (cores) would I need?\\n\\n`Allocations = 25,000 IPM / 983 IPM per allocation = 26 Allocation (cores rounded up)`\\n\\n**Now you can apply this math to determine the correct sampling and NER scaling to support your logging use case.**\\n\\n#### Redact Processor Scaling\\n\\nIn short, the `redact` processor should scale to production loads as long as you are using appropriately sized and configured nodes and have well-constructed regex patterns. \\n\\n### Assessing incoming logs\\n\\nIf you want to test on incoming logs data in a data stream. All you need to do is change the conditional in the `logs@custom` pipeline to apply the `process-pii` to the dataset you want to. You can use any conditional that fits your condition.\\n\\nNote: Just make sure that you have accounted for the proper scaling for the NER and Redact processors they were described above in [Production Scaling](#production-scaling)\\n\\n\\n```bash\\n {\\n \\"pipeline\\": {\\n \\"description\\" : \\"Call the process_pii pipeline on the correct dataset\\",\\n \\"if\\": \\"ctx?.data_stream?.dataset == \'pii\'\\", <<< HERE\\n \\"name\\": \\"process-pii\\"\\n }\\n }\\n```\\n\\nSo if for example your logs are coming into `logs-mycustomapp-default` you would just change the conditional to\\n\\n```\\n \\"if\\": \\"ctx?.data_stream?.dataset == \'mycustomapp\'\\",\\n```\\n\\n### Assessing historical data\\n\\nIf you have a historical (already ingested) data stream or index you can run the assessment over them using the `_reindex` API> \\n\\nNote: Just make sure that you have accounted for the proper scaling for the NER and Redact processors they were described above in [Production Scaling](#production-scaling)\\n\\nThere are a couple of extra steps: \\n[The code can be found here.](https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-historical-data-blog-2.json)\\n\\n1) First we can set the parameters to ONLY keep the sampled data as there is no reason to make a copy of all the unsampled data. In the `process-pii` pipeline, there is a setting `sample.keep_unsampled`, which we can set to `false`, which will then only keep the sampled data \\n\\n```bash\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": false <<< SET TO false\\n }\\n },\\n``` \\n\\n2) Second, we will create a pipeline that will reroute the data to the correct data stream to run through all the PII assessment/detection pipelines. It also sets the correct `dataset` and `namespace`\\n\\n```bash\\nDELETE _ingest/pipeline/sendtopii\\nPUT _ingest/pipeline/sendtopii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"data_stream.dataset\\",\\n \\"value\\": \\"pii\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"data_stream.namespace\\",\\n \\"value\\": \\"default\\"\\n }\\n },\\n {\\n \\"reroute\\" : \\n {\\n \\"dataset\\" : \\"{{data_stream.dataset}}\\",\\n \\"namespace\\": \\"{{data_stream.namespace}}\\"\\n }\\n }\\n ]\\n}\\n```\\n\\n3) Finally, we can run a `_reindex` to select the data we want to test/assess. It is recommended to review the [_reindex](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html) documents before trying this. First, select the source data stream you want to assess, in this example, it is the `logs-generic-default` logs data stream. Note: I also added a `range` filter to select a specific time range. There is a bit of a \\"trick\\" that we need to use since we are re-routing the data to the data stream `logs-pii-default`. To do this, we just set `\\"index\\": \\"logs-tmp-default\\"` in the `_reindex` as the correct data stream will be set in the pipeline. We must do that because `reroute` is a `noop` if it is called from/to the same datastream. \\n\\n```bash\\nPOST _reindex?wait_for_completion=false\\n{\\n \\"source\\": {\\n \\"index\\": \\"logs-generic-default\\",\\n \\"query\\": {\\n \\"bool\\": {\\n \\"filter\\": [\\n {\\n \\"range\\": {\\n \\"@timestamp\\": {\\n \\"gte\\": \\"now-1h/h\\",\\n \\"lt\\": \\"now\\"\\n }\\n }\\n }\\n ]\\n }\\n }\\n },\\n \\"dest\\": {\\n \\"op_type\\": \\"create\\",\\n \\"index\\": \\"logs-tmp-default\\",\\n \\"pipeline\\": \\"sendtopii\\"\\n }\\n}\\n```\\n\\n## Summary\\n\\nAt this point, you have the tools and processes need to assess, detect, analyze, alert and protect PII in your logs. \\n\\n[The end state solution can be found here:](https://github.com/bvader/elastic-pii/tree/main/elastic/blog-complete-end-solution). \\n\\nIn [Part 1 of this blog](https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1), we accomplished the following.\\n\\n* Reviewed the techniques and tools we have available for PII detection and assessment\\n* Reviewed NLP / NER role in PII detection and assessment\\n* Built the necessary composable ingest pipelines to sample logs and run them through the NER Model\\n* Reviewed the NER results and are ready to move to the second blog\\n\\n\\nIn **Part 2** of this blog, we covered the following:\\n\\n* Redact PII using NER and redact processor\\n* Apply field-level security to control access to the un-redacted data\\n* Enhance the dashboards and alerts\\n* Production considerations and scaling\\n* How to run these processes on incoming or historical data\\n\\n***So get to work and reduce risk in your logs!***\\n\\n## Data Loading Appendix\\n\\n#### Code\\n\\nThe data loading code can be found here: \\n\\n[https://github.com/bvader/elastic-pii](https://github.com/bvader/elastic-pii)\\n\\n```\\n$ git clone https://github.com/bvader/elastic-pii.git\\n```\\n\\n\\n#### Creating and Loading the Sample Data Set \\n\\n```\\n$ cd elastic-pii\\n$ cd python\\n$ python -m venv .env\\n$ source .env/bin/activate\\n$ pip install elasticsearch\\n$ pip install Faker\\n```\\n\\nRun the log generator \\n```\\n$ python generate_random_logs.py\\n```\\n\\nIf you do not changes any parameters, this will create 10000 random logs in a file named pii.log with a mix of logs that containe and do not contain PII. \\n\\nEdit `load_logs.py` and set the following \\n\\n```\\n# The Elastic User \\nELASTIC_USER = \\"elastic\\"\\n\\n# Password for the \'elastic\' user generated by Elasticsearch\\nELASTIC_PASSWORD = \\"askdjfhasldfkjhasdf\\"\\n\\n# Found in the \'Manage Deployment\' page\\nELASTIC_CLOUD_ID = \\"deployment:sadfjhasfdlkjsdhf3VuZC5pbzo0NDMkYjA0NmQ0YjFiYzg5NDM3ZDgxM2YxM2RhZjQ3OGE3MzIkZGJmNTE0OGEwODEzNGEwN2E3M2YwYjcyZjljYTliZWQ=\\"\\n```\\nThen run the following command. \\n\\n\\n```\\n$ python load_logs.py\\n```\\n#### Reloading the logs\\n**Note** To reload the logs, you can simply re-run the above command. You can run the command multiple time during this exercise and the logs will be reloaded (actually loaded again). The new logs will not collide with previous runs as there will be a unique `run.id` for each run which is displayed at the end of the loading process.\\n\\n```\\n$ python load_logs.py\\n```\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),b=(t,e)=>{for(var i in e)r(t,i,{get:e[i],enumerable:!0})},o=(t,e,i,l)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!f.call(t,a)&&a!==i&&r(t,a,{get:()=>e[a],enumerable:!(l=u(e,a))||l.enumerable});return t};var y=(t,e,i)=>(i=t!=null?p(m(t)):{},o(e||!t||!t.__esModule?r(i,\\"default\\",{value:t,enumerable:!0}):i,t)),v=t=>o(r({},\\"__esModule\\",{value:!0}),t);var d=w((P,s)=>{s.exports=_jsx_runtime});var E={};b(E,{default:()=>h,frontmatter:()=>I});var n=y(d()),I={title:\\"Using NLP and Pattern Matching to Detect, Assess, and Redact PII in Logs - Part 2\\",slug:\\"pii-ner-regex-assess-redact-part-2\\",date:\\"2024-10-22\\",description:\\"How to detect, assess, and redact PII in your logs using Elasticsearch, NLP and Pattern Matching\\",author:[{slug:\\"stephen-brown\\"}],image:\\"pii-ner-regex-assess-redact-part-2.png\\",tags:[{slug:\\"log-analytics\\"}]};function c(t){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",h4:\\"h4\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.h2,{id:\\"introduction\\",children:\\"Introduction:\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The prevalence of high-entropy logs in distributed systems has significantly raised the risk of PII (Personally Identifiable Information) seeping into our logs, which can result in security and compliance issues. This 2-part blog delves into the crucial task of identifying and managing this issue using the Elastic Stack. We will explore using NLP (Natural Language Processing) and Pattern matching to detect, assess, and, where feasible, redact PII from logs being ingested into Elasticsearch.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1\\",rel:\\"nofollow\\",children:\\"Part 1 of this blog\\"}),\\", we covered the following:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Review the techniques and tools we have available to manage PII in our logs\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Understand the roles of NLP / NER in PII detection\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Build a composable processing pipeline to detect and assess PII\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Sample logs and run them through the NER Model\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Assess the results of the NER Model\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In \\",(0,n.jsx)(e.strong,{children:\\"Part 2\\"}),\\" of this blog, we will cover the following:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Apply the \\",(0,n.jsx)(e.code,{children:\\"redact\\"}),\\" regex pattern processor and assess the results\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"Create Alerts using ESQL\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Apply field-level security to control access to the un-redacted data\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Production considerations and scaling\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"How to run these processes on incoming or historical data\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Reminder of the overall flow we will construct over the 2 blogs:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-1/pii-overall-flow.png\\",alt:\\"PII Overall Flow\\",width:\\"1679\\",height:\\"1110\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[`All code for this exercise can be found at:\\n`,(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii\\",rel:\\"nofollow\\",children:\\"https://github.com/bvader/elastic-pii\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"part-1-prerequisites\\",children:\\"Part 1 Prerequisites\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This blog picks up where \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1\\",rel:\\"nofollow\\",children:\\"Part 1 of this blog\\"}),\\" left off. You must have the NER model, ingest pipelines, and dashboard from Part 1 installed and working.\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Loaded and configured NER Model\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Installed all the composable ingest pipelines from Part 1 of the blog\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Installed dashboard\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You can access the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/logs-sampler-composable-pipelines-blog-1-complete.json\\",rel:\\"nofollow\\",children:\\"complete solution for Blog 1 here\\"}),\\". Don\'t forget to load the dashboard, found \\",(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/pii-dashboard-part-1.ndjson\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"applying-the-redact-processor\\",children:\\"Applying the Redact Processor\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Next, we will apply the \\",(0,n.jsxs)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/redact-processor.html\\",rel:\\"nofollow\\",children:[(0,n.jsx)(e.code,{children:\\"redact\\"}),\\" processor\\"]}),\\". The \\",(0,n.jsx)(e.code,{children:\\"redact\\"}),\\" processor is a simple regex-based processor that takes a list of regex patterns and looks for them in a field and replaces them with literals when found. The \\",(0,n.jsx)(e.code,{children:\\"redact\\"}),\\" processor is reasonably performant and can run at scale. At the end, we will discuss this in detail in the \\",(0,n.jsx)(e.a,{href:\\"#production-scaling\\",children:\\"production scaling\\"}),\\" section.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Elasticsearch comes packaged with a number of useful predefined \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/elasticsearch/blob/8.15/libs/grok/src/main/resources/patterns/ecs-v1\\",rel:\\"nofollow\\",children:\\"patterns\\"}),\\" that can be conveniently referenced by the \\",(0,n.jsx)(e.code,{children:\\"redact\\"}),\\" processor. If one does not suit your needs, create a new pattern with a custom definition. The Redact processor replaces every occurrence of a match. If there are multiple matches, they will all be replaced with the pattern name.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"In the code below, we leveraged some of the predefined patterns as well as constructing several custom patterns.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:` \\"patterns\\": [\\n \\"%{EMAILADDRESS:EMAIL_REGEX}\\", << Predefined\\n \\"%{IP:IP_ADDRESS_REGEX}\\", << Predefined\\n \\"%{CREDIT_CARD:CREDIT_CARD_REGEX}\\", << Custom\\n \\"%{SSN:SSN_REGEX}\\", << Custom\\n \\"%{PHONE:PHONE_REGEX}\\" << Custom\\n ]\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"We also replaced the PII with easily identifiable patterns we can use for assessment.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\'In addition, it is important to note that since the redact processor is a simple regex find and replace, it can be used against many \\"secrets\\" patterns, not just PII. There are many references for regex and secrets patterns, so you can reuse this capability to detect secrets in your logs.\'}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-composable-pipelines-blog-2-redact-processor-1.json\\",rel:\\"nofollow\\",children:\\"The code can be found here\\"}),\\" for the following two sections of code.\\"]}),`\\n`,(0,n.jsxs)(\\"details\\",{open:!0,children:[(0,n.jsx)(\\"summary\\",{children:\\"redact processor pipeline code - click to open/close\\"}),(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`# Add the PII redact processor pipeline\\nDELETE _ingest/pipeline/logs-pii-redact-processor\\nPUT _ingest/pipeline/logs-pii-redact-processor\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.proc.successful\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.proc.found\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.message == null\\",\\n \\"field\\": \\"redact.message\\",\\n \\"copy_from\\": \\"message\\"\\n }\\n },\\n {\\n \\"redact\\": {\\n \\"field\\": \\"redact.message\\",\\n \\"prefix\\": \\"\\",\\n \\"patterns\\": [\\n \\"%{EMAILADDRESS:EMAIL_REGEX}\\",\\n \\"%{IP:IP_ADDRESS_REGEX}\\",\\n \\"%{CREDIT_CARD:CREDIT_CARD_REGEX}\\",\\n \\"%{SSN:SSN_REGEX}\\",\\n \\"%{PHONE:PHONE_REGEX}\\"\\n ],\\n \\"pattern_definitions\\": {\\n \\"CREDIT_CARD\\": \\"\\"\\"\\\\\\\\d{4}[ -]\\\\\\\\d{4}[ -]\\\\\\\\d{4}[ -]\\\\\\\\d{4}\\"\\"\\",\\n \\"SSN\\": \\"\\"\\"\\\\\\\\d{3}-\\\\\\\\d{2}-\\\\\\\\d{4}\\"\\"\\",\\n \\"PHONE\\": \\"\\"\\"(\\\\\\\\+\\\\\\\\d{1,2}\\\\\\\\s?)?1?\\\\\\\\-?\\\\\\\\.?\\\\\\\\s?\\\\\\\\(?\\\\\\\\d{3}\\\\\\\\)?[\\\\\\\\s.-]?\\\\\\\\d{3}[\\\\\\\\s.-]?\\\\\\\\d{4}\\"\\"\\"\\n },\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set \'error.message\'\\",\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"REDACT_PROCESSOR_FAILED\\",\\n \\"override\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"redact.proc.successful\\",\\n \\"value\\": false\\n }\\n }\\n ]\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.message.contains(\'REDACTPROC\')\\",\\n \\"field\\": \\"redact.proc.found\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.pii?.found == null\\",\\n \\"field\\": \\"redact.pii.found\\",\\n \\"value\\": false\\n }\\n },\\n {\\n \\"set\\": {\\n \\"if\\": \\"ctx?.redact?.proc?.found == true\\",\\n \\"field\\": \\"redact.pii.found\\",\\n \\"value\\": true\\n }\\n }\\n ],\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"failure\\",\\n \\"value\\": \\"GENERAL_FAILURE\\",\\n \\"override\\": false\\n }\\n }\\n ]\\n}\\n`})})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"And now, we will add the \\",(0,n.jsx)(e.code,{children:\\"logs-pii-redact-processor\\"}),\\" pipeline to the overall \\",(0,n.jsx)(e.code,{children:\\"process-pii\\"}),\\" pipeline\\"]}),`\\n`,(0,n.jsxs)(\\"details\\",{open:!0,children:[(0,n.jsx)(\\"summary\\",{children:\\"redact processor pipeline code - click to open/close\\"}),(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`# Updated Process PII pipeline that now call the NER and Redact Processor pipeline\\nDELETE _ingest/pipeline/process-pii\\nPUT _ingest/pipeline/process-pii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set true if enabling sampling, otherwise false\\",\\n \\"field\\": \\"sample.enabled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 1000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == true\\",\\n \\"name\\": \\"logs-sampler\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-ner-pii-processor\\"\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-pii-redact-processor\\"\\n }\\n }\\n ]\\n}\\n`})})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Reload the data as described in the \\",(0,n.jsx)(e.a,{href:\\"#reloading-the-logs\\",children:\\"Reloading the logs\\"}),\\". If you have not generated the logs the first time, follow the instructions in the \\",(0,n.jsx)(e.a,{href:\\"#data-loading-appendix\\",children:\\"Data Loading Appendix\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[`Go to Discover and enter the following into the KQL bar\\n`,(0,n.jsx)(e.code,{children:\\"sample.sampled : true and redact.message: REDACTPROC\\"}),\\" and add the \\",(0,n.jsx)(e.code,{children:\\"redact.message\\"}),\\" to the table and you should see something like this.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-2/pii-discover-1-part-2.png\\",alt:\\"PII Discover Blog 2 Part 1\\",width:\\"3252\\",height:\\"1948\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"And if you did not load the dashboard from Blog Part 1 at already, load it, it can be found \\",(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-1/pii-dashboard-part-1.ndjson\\",rel:\\"nofollow\\",children:\\"here\\"}),\\" using the Kibana -> Stack Management -> Saved Objects -> Import.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"It should look something like this now. Note that the REGEX portions of the dashboard are now active.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-2/pii-dashboard-1-part-2.png\\",alt:\\"PII Dashboards Blog 2 Part 1\\",width:\\"3076\\",height:\\"1758\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"checkpoint\\",children:\\"Checkpoint\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"At this point, we have the following capabilities:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Ability to sample incoming logs and apply this PII redaction\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Detect and Assess PII with the NER/NLP and Pattern Matching\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Assess the amount, type and quality of the PII detections\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"This is a great point to stop if you are just running all this once to see how it works, but we have a few more steps to make this useful in production systems.\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Clean up the working and unredacted data\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Update the Dashboard to work with the cleaned-up data\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Apply Role Based Access Control to protect the raw unredacted data\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Create Alerts\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Production and Scaling Considerations\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"How to run these processes on incoming or historical data\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h2,{id:\\"applying-to-production-systems\\",children:\\"Applying to Production Systems\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"cleanup-working-data-and-update-the-dashboard\\",children:\\"Cleanup working data and update the dashboard\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"And now we will add the cleanup code to the overall \\",(0,n.jsx)(e.code,{children:\\"process-pii\\"}),\\" pipeline.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In short, we set a flag \\",(0,n.jsx)(e.code,{children:\\"redact.enable: true\\"}),\\" that directs the pipeline to move the unredacted \\",(0,n.jsx)(e.code,{children:\\"message\\"}),\\" field to \\",(0,n.jsx)(e.code,{children:\\"raw.message\\"}),\\" and the move the redacted message field \\",(0,n.jsx)(e.code,{children:\\"redact.message\\"}),\\"to the \\",(0,n.jsx)(e.code,{children:\\"message\\"}),\' field. We will \\"protect\\" the \',(0,n.jsx)(e.code,{children:\\"raw.message\\"}),\\" in the following section.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"NOTE:\\"}),\\" Of course you can change this behavior if you want to completely delete the unredacted data. In this exercise we will keep it and protect it.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In addition we set \\",(0,n.jsx)(e.code,{children:\\"redact.cleanup: true\\"}),\\" to clean up the NLP working data.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"These fields allow a lot of control over what data you decide to keep and analyze.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-composable-pipelines-blog-2-redact-processor-2.json\\",rel:\\"nofollow\\",children:\\"The code can be found here\\"}),\\" for the following two sections of code.\\"]}),`\\n`,(0,n.jsxs)(\\"details\\",{open:!0,children:[(0,n.jsx)(\\"summary\\",{children:\\"redact processor pipeline code - click to open/close\\"}),(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`# Updated Process PII pipeline that now call the NER and Redact Processor pipeline and cleans up \\nDELETE _ingest/pipeline/process-pii\\nPUT _ingest/pipeline/process-pii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set true if enabling sampling, otherwise false\\",\\n \\"field\\": \\"sample.enabled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set Sampling Rate 0 None 10000 all allows for 0.01% precision\\",\\n \\"field\\": \\"sample.sample_rate\\",\\n \\"value\\": 1000\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == true\\",\\n \\"name\\": \\"logs-sampler\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-ner-pii-processor\\"\\n }\\n },\\n {\\n \\"pipeline\\": {\\n \\"if\\": \\"ctx.sample.enabled == false || (ctx.sample.enabled == true && ctx.sample.sampled == true)\\",\\n \\"name\\": \\"logs-pii-redact-processor\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to true to actually redact, false will run processors but leave original\\",\\n \\"field\\": \\"redact.enable\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"if\\": \\"ctx?.redact?.pii?.found == true && ctx?.redact?.enable == true\\",\\n \\"field\\": \\"message\\",\\n \\"target_field\\": \\"raw.message\\"\\n }\\n },\\n {\\n \\"rename\\": {\\n \\"if\\": \\"ctx?.redact?.pii?.found == true && ctx?.redact?.enable == true\\",\\n \\"field\\": \\"redact.message\\",\\n \\"target_field\\": \\"message\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set to true to actually to clean up working data\\",\\n \\"field\\": \\"redact.cleanup\\",\\n \\"value\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"if\\": \\"ctx?.redact?.cleanup == true\\",\\n \\"field\\": [\\n \\"ml\\"\\n ],\\n \\"ignore_failure\\": true\\n }\\n }\\n ]\\n}\\n`})})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Reload the data as described here in the \\",(0,n.jsx)(e.a,{href:\\"#reloading-the-logs\\",children:\\"Reloading the logs\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[`Go to Discover and enter the following into the KQL bar\\n`,(0,n.jsx)(e.code,{children:\\"sample.sampled : true and redact.pii.found: true\\"}),\\" and add the following fields to the table\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.code,{children:\\"message\\"}),\\",\\",(0,n.jsx)(e.code,{children:\\"raw.message\\"}),\\",\\",(0,n.jsx)(e.code,{children:\\"redact.ner.found\\"}),\\",\\",(0,n.jsx)(e.code,{children:\\"redact.proc.found\\"}),\\",\\",(0,n.jsx)(e.code,{children:\\"redact.pii.found\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[`You should see something like this\\n`,(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-2/pii-discover-2-part-2.png\\",alt:\\"PII Discover Part 2 Blog 2\\",width:\\"4066\\",height:\\"1812\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"We have everything we need to move forward with protecting the PII and Alerting on it.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Load up the new dashboard that works on the cleaned-up data\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To load the dashboard, go to Kibana -> Stack Management -> Saved Objects and import the \\",(0,n.jsx)(e.code,{children:\\"pii-dashboard-part-2.ndjson\\"}),\\" file that can be found \\",(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-dashboard-part-2.ndjson\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The new dashboard should look like this. Note: It uses different fields under the covers since we have cleaned up the underlying data.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[`You should see something like this\\n`,(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-2/pii-dashboard-2-part-2.png\\",alt:\\"PII Dashboard Part 2 Blog 2\\",width:\\"2408\\",height:\\"1686\\"})]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"apply-role-based-access-control-to-protect-the-raw-unredacted-data\\",children:\\"Apply Role Based Access Control to protect the raw unredacted data\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elasticsearch supports role-based access control, including field and document level access control natively; it dramatically reduces the operational and maintenance complexity required to secure our application.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We will create a Role that does not allow access to the \\",(0,n.jsx)(e.code,{children:\\"raw.message\\"}),\\" field and then create a user and assign that user the role. With that role, the user will only be able to see the redacted message, which is now in the \\",(0,n.jsx)(e.code,{children:\\"message\\"}),\\" field, but will not be able to access the protected \\",(0,n.jsx)(e.code,{children:\\"raw.message\\"}),\\" field.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"NOTE:\\"}),\\" Since we only sampled 10% of the data in this exercise the non-sampled \\",(0,n.jsx)(e.code,{children:\\"message\\"}),\\" fields are not moved to the \\",(0,n.jsx)(e.code,{children:\\"raw.message\\"}),\\", so they are still viewable, but this shows the capability you can apply in a production system.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-composable-pipelines-blog-2-rbac.json\\",rel:\\"nofollow\\",children:\\"The code can be found here\\"}),\\" for the following section of code.\\"]}),`\\n`,(0,n.jsxs)(\\"details\\",{open:!0,children:[(0,n.jsx)(\\"summary\\",{children:\\"RBAC protect-pii role and user code - click to open/close\\"}),(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`# Create role with no access to the raw.message field\\nGET _security/role/protect-pii\\nDELETE _security/role/protect-pii\\nPUT _security/role/protect-pii\\n{\\n \\"cluster\\": [],\\n \\"indices\\": [\\n {\\n \\"names\\": [\\n \\"logs-*\\"\\n ],\\n \\"privileges\\": [\\n \\"read\\",\\n \\"view_index_metadata\\"\\n ],\\n \\"field_security\\": {\\n \\"grant\\": [\\n \\"*\\"\\n ],\\n \\"except\\": [\\n \\"raw.message\\"\\n ]\\n },\\n \\"allow_restricted_indices\\": false\\n }\\n ],\\n \\"applications\\": [\\n {\\n \\"application\\": \\"kibana-.kibana\\",\\n \\"privileges\\": [\\n \\"all\\"\\n ],\\n \\"resources\\": [\\n \\"*\\"\\n ]\\n }\\n ],\\n \\"run_as\\": [],\\n \\"metadata\\": {},\\n \\"transient_metadata\\": {\\n \\"enabled\\": true\\n }\\n}\\n\\n# Create user stephen with protect-pii role\\nGET _security/user/stephen\\nDELETE /_security/user/stephen\\nPOST /_security/user/stephen\\n{\\n \\"password\\" : \\"mypassword\\",\\n \\"roles\\" : [ \\"protect-pii\\" ],\\n \\"full_name\\" : \\"Stephen Brown\\"\\n}\\n\\n`})})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now log into a separate window with the new user \\",(0,n.jsx)(e.code,{children:\\"stephen\\"}),\\" with the \\",(0,n.jsx)(e.code,{children:\\"protect-pii role\\"}),\\". Go to Discover and put \\",(0,n.jsx)(e.code,{children:\\"redact.pii.found : true\\"}),\\" in the KQL bar and add the \\",(0,n.jsx)(e.code,{children:\\"message\\"}),\\" field to the table. Also, notice that the \\",(0,n.jsx)(e.code,{children:\\"raw.message\\"}),\\" is not available.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[`You should see something like this\\n`,(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-2/pii-discover-3-part-2.png\\",alt:\\"PII Dashboard Part 2 Blog 2\\",width:\\"3580\\",height:\\"1806\\"})]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"create-an-alert-when-pii-detected\\",children:\\"Create an Alert when PII Detected\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now, with the processing of the pipelines, creating an alert when PII is detected is easy. To review \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/alerting-getting-started.html\\",rel:\\"nofollow\\",children:\\"Alerting in Kibana\\"}),\\" in detail if needed\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"NOTE: \\",(0,n.jsx)(e.a,{href:\\"#reloading-the-logs\\",children:\\"Reload\\"}),\\" the data if needed to have recent data.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"First, we will create a simple \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html\\",rel:\\"nofollow\\",children:\\"ES|QL query\\"}),\\" in Discover.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-esql-alert-blog-2.txt\\",rel:\\"nofollow\\",children:\\"The code can be found here.\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`FROM logs-pii-default\\n| WHERE redact.pii.found == true\\n| STATS pii_count = count(*)\\n| WHERE pii_count > 0\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"When you run this you should see something like this.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-2/pii-esql-1-part-2.png\\",alt:\\"PII ESQL Part 1 Blog 2\\",width:\\"2656\\",height:\\"1822\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now click the Alerts menu and select \\",(0,n.jsx)(e.code,{children:\\"Create search threshold rule\\"}),\\", and will create an alert to alert us when PII is found.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:`Select a time field: @timestamp\\nSet the time window: 5 minutes`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Assuming you loaded the data recently when you run \\",(0,n.jsx)(e.strong,{children:\\"Test\\"}),\\" it should do something like\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"pii_count : \\",(0,n.jsx)(e.code,{children:\\"343\\"}),`\\nAlerts generated `,(0,n.jsx)(e.code,{children:\\"query matched\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Add an action when the alert is Active.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsxs)(e.strong,{children:[\\"For each alert: \\",(0,n.jsx)(e.code,{children:\\"On status changes\\"}),`\\nRun when: `,(0,n.jsx)(e.code,{children:\\"Query matched\\"})]})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`Elasticsearch query rule {{rule.name}} is active:\\n\\n- PII Found: true\\n- PII Count: {{#context.hits}} {{_source.pii_count}}{{/context.hits}}\\n- Conditions Met: {{context.conditions}} over {{rule.params.timeWindowSize}}{{rule.params.timeWindowUnit}}\\n- Timestamp: {{context.date}}\\n- Link: {{context.link}}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Add an Action for when the Alert is Recovered.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsxs)(e.strong,{children:[\\"For each alert: \\",(0,n.jsx)(e.code,{children:\\"On status changes\\"}),`\\nRun when: `,(0,n.jsx)(e.code,{children:\\"Recovered\\"})]})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`Elasticsearch query rule {{rule.name}} is Recovered:\\n\\n- PII Found: false\\n- Conditions Not Met: {{context.conditions}} over {{rule.params.timeWindowSize}}{{rule.params.timeWindowUnit}}\\n- Timestamp: {{context.date}}\\n- Link: {{context.link}}\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"When all setup it should look like this and \\",(0,n.jsx)(e.code,{children:\\"Save\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-2/pii-alert-1-part2.png\\",alt:\\"Alert Setup\\",width:\\"406\\",height:\\"848\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-2/pii-alert-2-part2.png\\",alt:\\"Action Alert\\",width:\\"410\\",height:\\"428\\"}),(0,n.jsx)(e.br,{}),`\\n`,(0,n.jsx)(e.img,{src:\\"/assets/images/pii-ner-regex-assess-redact-part-2/pii-alert-3-part2.png\\",alt:\\"Action Alert\\",width:\\"410\\",height:\\"469\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"You should get an Active alert that looks like this if you have recent data. I sent mine to Slack.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`Elasticsearch query rule pii-found-esql is active:\\n- PII Found: true\\n- PII Count: 374\\n- Conditions Met: Query matched documents over 5m\\n- Timestamp: 2024-10-15T02:44:52.795Z\\n- Link: https://mydeployment123.aws.found.io:9243/app/management/insightsAndAlerting/triggersActions/rule/7d6faecf-964e-46da-aaba-8a2f89f33989\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"And then if you wait you will get a Recovered alert that looks like this.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`Elasticsearch query rule pii-found-esql is Recovered:\\n- PII Found: false\\n- Conditions Not Met: Query did NOT match documents over 5m\\n- Timestamp: 2024-10-15T02:49:04.815Z\\n- Link: https://mydeployment123.kb.us-west-1.aws.found.io:9243/app/management/insightsAndAlerting/triggersActions/rule/7d6faecf-964e-46da-aaba-8a2f89f33989\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"production-scaling\\",children:\\"Production Scaling\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"ner-scaling\\",children:\\"NER Scaling\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"As we mentioned \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1#named-entity-recognition-ner-detection\\",rel:\\"nofollow\\",children:\\"Part 1 of this blog\\"}),\\" of this blog, NER / NLP Models are CPU-intensive and expensive to run at scale; thus, we employed a sampling technique to understand the risk in our logs without sending the full logs volume through the NER Model.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Please review \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1#loading-configuration-and-execution-of-the-ner-pipeline\\",rel:\\"nofollow\\",children:\\"the setup and configuration of the NER\\"}),\\" model from Part 1 of the blog.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"We chose the base BERT NER model \\",(0,n.jsx)(e.a,{href:\\"https://huggingface.co/dslim/bert-base-NER\\",rel:\\"nofollow\\",children:\\"bert-base-NER\\"}),\\" for our PII case.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To scale ingest, we will focus on scaling the allocations for the deployed model. More information on this topic is available \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-deploy-model.html\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". The number of allocations must be less than the available allocated processors (cores, not vCPUs) per node.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The metrics below are related to the model and configuration from Part 1 of the blog.\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"4 Allocations to allow for more parallel ingestion\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"1 Thread per Allocation\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[`0 Byes Cache, as we expect a low cache hit rate\\n`,(0,n.jsx)(e.strong,{children:\\"Note\\"}),\\" If there are many repeated logs, cache can help, but with timestamps and other variations, cache will not help and can even slow down the process\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"8192 Queue\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`GET _ml/trained_models/dslim__bert-base-ner/_stats\\n.....\\n \\"node\\": {\\n \\"0m4tq7tMRC2H5p5eeZoQig\\": {\\n.....\\n \\"attributes\\": {\\n \\"xpack.installed\\": \\"true\\",\\n \\"region\\": \\"us-west-1\\",\\n \\"ml.allocated_processors\\": \\"5\\", << HERE \\n.....\\n },\\n \\"inference_count\\": 5040,\\n \\"average_inference_time_ms\\": 138.44285714285715, << HERE \\n \\"average_inference_time_ms_excluding_cache_hits\\": 138.44285714285715,\\n \\"inference_cache_hit_count\\": 0,\\n.....\\n \\"threads_per_allocation\\": 1,\\n \\"number_of_allocations\\": 4, <<< HERE\\n \\"peak_throughput_per_minute\\": 1550,\\n \\"throughput_last_minute\\": 1373,\\n \\"average_inference_time_ms_last_minute\\": 137.55280407865988,\\n \\"inference_cache_hit_count_last_minute\\": 0\\n }\\n ]\\n }\\n }\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"There are 3 key pieces of information above:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.code,{children:\'\\"ml.allocated_processors\\": \\"5\\"\'}),`\\nThe number of physical cores / processors available`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.code,{children:\'\\"number_of_allocations\\": 4\'}),`\\nThe number of allocations which is maximum 1 per physical core. `,(0,n.jsx)(e.strong,{children:\\"Note\\"}),\\": we could have used 5 allocations, but we only allocated 4 for this exercise\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.code,{children:\'\\"average_inference_time_ms\\": 138.44285714285715\'}),`\\nThe averages inference time per document.`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The math is pretty straightforward for throughput for Inferences per Min (IPM) per allocation (1 allocation per physical core), since an inference uses a single core and a single thread.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Then the Inferences per Min per Allocation is simply:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.code,{children:\\"IPM per allocation = 60,000 ms (in a minute) / 138ms per inference = 435\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"When then lines up with the Total Inferences per Minute\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.code,{children:\\"Total IPM = 435 IPM / allocation * 4 Allocations = ~1740\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Suppose we want to do 10,000 IPMs, how many allocations (cores) would I need?\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.code,{children:\\"Allocations = 10,000 IPM / 435 IPM per allocation = 23 Allocation (cores rounded up)\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Or perhaps logs are coming in at 5000 EPS and you want to do 1% Sampling.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.code,{children:\\"IPM = 5000 EPS * 60sec * 0.01 sampling = 3000 IPM sampled\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Then\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.code,{children:\\"Number of Allocators = 3000 IPM / 435 IPM per allocation = 7 allocations (cores rounded up)\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Want Faster!\\"}),\\" Turns out there is a more lightweight NER Model \\",(0,n.jsx)(e.a,{href:\\"https://huggingface.co/dslim/distilbert-NER\\",rel:\\"nofollow\\",children:`\\ndistilbert-NER`}),\\" model that is faster, but the tradeoff is a little less accuracy.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Running the logs through this model results in an inference time nearly twice as fast!\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.code,{children:\'\\"average_inference_time_ms\\": 66.0263959390863\'})}),`\\n`,(0,n.jsxs)(e.p,{children:[`Here is some quick math:\\n`,(0,n.jsx)(e.code,{children:\\"$IPM per allocation = 60,000 ms (in a minute) / 61ms per inference = 983\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"Suppose we want to do 25,000 IPMs, how many allocations (cores) would I need?\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.code,{children:\\"Allocations = 25,000 IPM / 983 IPM per allocation = 26 Allocation (cores rounded up)\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Now you can apply this math to determine the correct sampling and NER scaling to support your logging use case.\\"})}),`\\n`,(0,n.jsx)(e.h4,{id:\\"redact-processor-scaling\\",children:\\"Redact Processor Scaling\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In short, the \\",(0,n.jsx)(e.code,{children:\\"redact\\"}),\\" processor should scale to production loads as long as you are using appropriately sized and configured nodes and have well-constructed regex patterns.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"assessing-incoming-logs\\",children:\\"Assessing incoming logs\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"If you want to test on incoming logs data in a data stream. All you need to do is change the conditional in the \\",(0,n.jsx)(e.code,{children:\\"logs@custom\\"}),\\" pipeline to apply the \\",(0,n.jsx)(e.code,{children:\\"process-pii\\"}),\\" to the dataset you want to. You can use any conditional that fits your condition.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Note: Just make sure that you have accounted for the proper scaling for the NER and Redact processors they were described above in \\",(0,n.jsx)(e.a,{href:\\"#production-scaling\\",children:\\"Production Scaling\\"})]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:` {\\n \\"pipeline\\": {\\n \\"description\\" : \\"Call the process_pii pipeline on the correct dataset\\",\\n \\"if\\": \\"ctx?.data_stream?.dataset == \'pii\'\\", <<< HERE\\n \\"name\\": \\"process-pii\\"\\n }\\n }\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"So if for example your logs are coming into \\",(0,n.jsx)(e.code,{children:\\"logs-mycustomapp-default\\"}),\\" you would just change the conditional to\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:` \\"if\\": \\"ctx?.data_stream?.dataset == \'mycustomapp\'\\",\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"assessing-historical-data\\",children:\\"Assessing historical data\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"If you have a historical (already ingested) data stream or index you can run the assessment over them using the \\",(0,n.jsx)(e.code,{children:\\"_reindex\\"}),\\" API>\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Note: Just make sure that you have accounted for the proper scaling for the NER and Redact processors they were described above in \\",(0,n.jsx)(e.a,{href:\\"#production-scaling\\",children:\\"Production Scaling\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[`There are a couple of extra steps:\\n`,(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/blob/main/elastic/blog-part-2/pii-redact-historical-data-blog-2.json\\",rel:\\"nofollow\\",children:\\"The code can be found here.\\"})]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"First we can set the parameters to ONLY keep the sampled data as there is no reason to make a copy of all the unsampled data. In the \\",(0,n.jsx)(e.code,{children:\\"process-pii\\"}),\\" pipeline, there is a setting \\",(0,n.jsx)(e.code,{children:\\"sample.keep_unsampled\\"}),\\", which we can set to \\",(0,n.jsx)(e.code,{children:\\"false\\"}),\\", which will then only keep the sampled data\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:` {\\n \\"set\\": {\\n \\"description\\": \\"Set to false if you want to drop unsampled data, handy for reindexing hostorical data\\",\\n \\"field\\": \\"sample.keep_unsampled\\",\\n \\"value\\": false <<< SET TO false\\n }\\n },\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"2\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Second, we will create a pipeline that will reroute the data to the correct data stream to run through all the PII assessment/detection pipelines. It also sets the correct \\",(0,n.jsx)(e.code,{children:\\"dataset\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"namespace\\"})]}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`DELETE _ingest/pipeline/sendtopii\\nPUT _ingest/pipeline/sendtopii\\n{\\n \\"processors\\": [\\n {\\n \\"set\\": {\\n \\"field\\": \\"data_stream.dataset\\",\\n \\"value\\": \\"pii\\"\\n }\\n },\\n {\\n \\"set\\": {\\n \\"field\\": \\"data_stream.namespace\\",\\n \\"value\\": \\"default\\"\\n }\\n },\\n {\\n \\"reroute\\" : \\n {\\n \\"dataset\\" : \\"{{data_stream.dataset}}\\",\\n \\"namespace\\": \\"{{data_stream.namespace}}\\"\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,n.jsxs)(e.ol,{start:\\"3\\",children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"Finally, we can run a \\",(0,n.jsx)(e.code,{children:\\"_reindex\\"}),\\" to select the data we want to test/assess. It is recommended to review the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html\\",rel:\\"nofollow\\",children:\\"_reindex\\"}),\\" documents before trying this. First, select the source data stream you want to assess, in this example, it is the \\",(0,n.jsx)(e.code,{children:\\"logs-generic-default\\"}),\\" logs data stream. Note: I also added a \\",(0,n.jsx)(e.code,{children:\\"range\\"}),\' filter to select a specific time range. There is a bit of a \\"trick\\" that we need to use since we are re-routing the data to the data stream \',(0,n.jsx)(e.code,{children:\\"logs-pii-default\\"}),\\". To do this, we just set \\",(0,n.jsx)(e.code,{children:\'\\"index\\": \\"logs-tmp-default\\"\'}),\\" in the \\",(0,n.jsx)(e.code,{children:\\"_reindex\\"}),\\" as the correct data stream will be set in the pipeline. We must do that because \\",(0,n.jsx)(e.code,{children:\\"reroute\\"}),\\" is a \\",(0,n.jsx)(e.code,{children:\\"noop\\"}),\\" if it is called from/to the same datastream.\\"]}),`\\n`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`POST _reindex?wait_for_completion=false\\n{\\n \\"source\\": {\\n \\"index\\": \\"logs-generic-default\\",\\n \\"query\\": {\\n \\"bool\\": {\\n \\"filter\\": [\\n {\\n \\"range\\": {\\n \\"@timestamp\\": {\\n \\"gte\\": \\"now-1h/h\\",\\n \\"lt\\": \\"now\\"\\n }\\n }\\n }\\n ]\\n }\\n }\\n },\\n \\"dest\\": {\\n \\"op_type\\": \\"create\\",\\n \\"index\\": \\"logs-tmp-default\\",\\n \\"pipeline\\": \\"sendtopii\\"\\n }\\n}\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"At this point, you have the tools and processes need to assess, detect, analyze, alert and protect PII in your logs.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii/tree/main/elastic/blog-complete-end-solution\\",rel:\\"nofollow\\",children:\\"The end state solution can be found here:\\"}),\\".\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/pii-ner-regex-assess-redact-part-1\\",rel:\\"nofollow\\",children:\\"Part 1 of this blog\\"}),\\", we accomplished the following.\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Reviewed the techniques and tools we have available for PII detection and assessment\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Reviewed NLP / NER role in PII detection and assessment\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Built the necessary composable ingest pipelines to sample logs and run them through the NER Model\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Reviewed the NER results and are ready to move to the second blog\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In \\",(0,n.jsx)(e.strong,{children:\\"Part 2\\"}),\\" of this blog, we covered the following:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Redact PII using NER and redact processor\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Apply field-level security to control access to the un-redacted data\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Enhance the dashboards and alerts\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Production considerations and scaling\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"How to run these processes on incoming or historical data\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.em,{children:(0,n.jsx)(e.strong,{children:\\"So get to work and reduce risk in your logs!\\"})})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"data-loading-appendix\\",children:\\"Data Loading Appendix\\"}),`\\n`,(0,n.jsx)(e.h4,{id:\\"code\\",children:\\"Code\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The data loading code can be found here:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.a,{href:\\"https://github.com/bvader/elastic-pii\\",rel:\\"nofollow\\",children:\\"https://github.com/bvader/elastic-pii\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ git clone https://github.com/bvader/elastic-pii.git\\n`})}),`\\n`,(0,n.jsx)(e.h4,{id:\\"creating-and-loading-the-sample-data-set\\",children:\\"Creating and Loading the Sample Data Set\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ cd elastic-pii\\n$ cd python\\n$ python -m venv .env\\n$ source .env/bin/activate\\n$ pip install elasticsearch\\n$ pip install Faker\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Run the log generator\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ python generate_random_logs.py\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"If you do not changes any parameters, this will create 10000 random logs in a file named pii.log with a mix of logs that containe and do not contain PII.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Edit \\",(0,n.jsx)(e.code,{children:\\"load_logs.py\\"}),\\" and set the following\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`# The Elastic User \\nELASTIC_USER = \\"elastic\\"\\n\\n# Password for the \'elastic\' user generated by Elasticsearch\\nELASTIC_PASSWORD = \\"askdjfhasldfkjhasdf\\"\\n\\n# Found in the \'Manage Deployment\' page\\nELASTIC_CLOUD_ID = \\"deployment:sadfjhasfdlkjsdhf3VuZC5pbzo0NDMkYjA0NmQ0YjFiYzg5NDM3ZDgxM2YxM2RhZjQ3OGE3MzIkZGJmNTE0OGEwODEzNGEwN2E3M2YwYjcyZjljYTliZWQ=\\"\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Then run the following command.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ python load_logs.py\\n`})}),`\\n`,(0,n.jsx)(e.h4,{id:\\"reloading-the-logs\\",children:\\"Reloading the logs\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Note\\"}),\\" To reload the logs, you can simply re-run the above command. You can run the command multiple time during this exercise and the logs will be reloaded (actually loaded again). The new logs will not collide with previous runs as there will be a unique \\",(0,n.jsx)(e.code,{children:\\"run.id\\"}),\\" for each run which is displayed at the end of the loading process.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`$ python load_logs.py\\n`})})]})}function h(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(c,{...t})}):c(t)}return v(E);})();\\n;return Component;"},"_id":"articles/pii-ner-regex-assess-redact-part-2.mdx","_raw":{"sourceFilePath":"articles/pii-ner-regex-assess-redact-part-2.mdx","sourceFileName":"pii-ner-regex-assess-redact-part-2.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/pii-ner-regex-assess-redact-part-2"},"type":"Article","imageUrl":"/assets/images/pii-ner-regex-assess-redact-part-2/pii-ner-regex-assess-redact-part-2.png","readingTime":"32 min read","url":"/pii-ner-regex-assess-redact-part-2","headings":[{"level":2,"title":"Introduction:","href":"#introduction"},{"level":3,"title":"Part 1 Prerequisites","href":"#part-1-prerequisites"},{"level":3,"title":"Applying the Redact Processor","href":"#applying-the-redact-processor"},{"level":2,"title":"Checkpoint ","href":"#checkpoint-"},{"level":2,"title":"Applying to Production Systems","href":"#applying-to-production-systems"},{"level":3,"title":"Cleanup working data and update the dashboard","href":"#cleanup-working-data-and-update-the-dashboard"},{"level":3,"title":"Apply Role Based Access Control to protect the raw unredacted data","href":"#apply-role-based-access-control-to-protect-the-raw-unredacted-data"},{"level":3,"title":"Create an Alert when PII Detected","href":"#create-an-alert-when-pii-detected"},{"level":3,"title":"Production Scaling","href":"#production-scaling"},{"level":4,"title":"NER Scaling","href":"#ner-scaling"},{"level":4,"title":"Redact Processor Scaling","href":"#redact-processor-scaling"},{"level":3,"title":"Assessing incoming logs","href":"#assessing-incoming-logs"},{"level":3,"title":"Assessing historical data","href":"#assessing-historical-data"},{"level":2,"title":"Summary","href":"#summary"},{"level":2,"title":"Data Loading Appendix","href":"#data-loading-appendix"},{"level":4,"title":"Code","href":"#code"},{"level":4,"title":"Creating and Loading the Sample Data Set ","href":"#creating-and-loading-the-sample-data-set-"},{"level":4,"title":"Reloading the logs","href":"#reloading-the-logs"}]},{"title":"Pruning incoming log volumes with Elastic","slug":"pruning-incoming-log-volumes","date":"2023-06-23","description":"To drop or not to drop (events) is the question, not only in deciding what events and fields to remove from your logs but also in the various tools used. Learn about using Beats, Logstash, Elastic Agent, Ingest Pipelines, and OTel Collectors.","image":"blog-thumb-elastic-on-elastic.png","author":[{"slug":"carly-richmond","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\n```yaml\\nfilebeat.inputs:\\n - type: filestream\\n id: my-logging-app\\n paths:\\n - /var/log/*.log\\n```\\n\\n```yaml\\nfilebeat.inputs:\\n - type: filestream\\n id: my-logging-app\\n paths:\\n - /var/tmp/other.log\\n - /var/log/*.log\\nprocessors:\\n - drop_event:\\n when:\\n and:\\n - equals:\\n url.scheme: http\\n - equals:\\n url.path: /profile\\n```\\n\\n```yaml\\nfilebeat.inputs:\\n - type: filestream\\n id: my-logging-app\\n paths:\\n - /var/tmp/other.log\\n - /var/log/*.log\\nprocessors:\\n - drop_fields:\\n when:\\n and:\\n - equals:\\n url.scheme: http\\n - equals:\\n http.response.status_code: 200\\n fields: [\\"event.message\\"]\\n ignore_missing: false\\n```\\n\\n```ruby\\ninput {\\n file {\\n id => \\"my-logging-app\\"\\n path => [ \\"/var/tmp/other.log\\", \\"/var/log/*.log\\" ]\\n }\\n}\\nfilter {\\n if [url.scheme] == \\"http\\" && [url.path] == \\"/profile\\" {\\n drop {\\n percentage => 80\\n }\\n }\\n}\\noutput {\\n elasticsearch {\\n hosts => \\"https://my-elasticsearch:9200\\"\\n data_stream => \\"true\\"\\n }\\n}\\n```\\n\\n```ruby\\n# Input configuration omitted\\nfilter {\\n if [url.scheme] == \\"http\\" && [http.response.status_code] == 200 {\\n drop {\\n percentage => 80\\n }\\n mutate {\\n remove_field: [ \\"event.message\\" ]\\n }\\n }\\n}\\n# Output configuration omitted\\n```\\n\\n```bash\\nPUT _ingest/pipeline/my-logging-app-pipeline\\n{\\n \\"description\\": \\"Event and field dropping for my-logging-app\\",\\n \\"processors\\": [\\n {\\n \\"drop\\": {\\n \\"description\\" : \\"Drop event\\",\\n \\"if\\": \\"ctx?.url?.scheme == \'http\' && ctx?.url?.path == \'/profile\'\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"description\\" : \\"Drop field\\",\\n \\"field\\" : \\"event.message\\",\\n \\"if\\": \\"ctx?.url?.scheme == \'http\' && ctx?.http?.response?.status_code == 200\\",\\n \\"ignore_failure\\": false\\n }\\n }\\n ]\\n}\\n```\\n\\n```bash\\nPUT _ingest/pipeline/my-logging-app-pipeline\\n{\\n \\"description\\": \\"Event and field dropping for my-logging-app with failures\\",\\n \\"processors\\": [\\n {\\n \\"drop\\": {\\n \\"description\\" : \\"Drop event\\",\\n \\"if\\": \\"ctx?.url?.scheme == \'http\' && ctx?.url?.path == \'/profile\'\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"description\\" : \\"Drop field\\",\\n \\"field\\" : \\"event.message\\",\\n \\"if\\": \\"ctx?.url?.scheme == \'http\' && ctx?.http?.response?.status_code == 200\\",\\n \\"ignore_failure\\": false\\n }\\n }\\n ],\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set \'ingest.failure.message\'\\",\\n \\"field\\": \\"ingest.failure.message\\",\\n \\"value\\": \\"Ingestion issue\\"\\n }\\n }\\n ]\\n}\\n```\\n\\n```yaml\\nreceivers:\\n filelog:\\n include: [/var/tmp/other.log, /var/log/*.log]\\nprocessors:\\n filter/denylist:\\n error_mode: ignore\\n logs:\\n log_record:\\n - \'url.scheme == \\"info\\"\'\\n - \'url.path == \\"/profile\\"\'\\n - \\"http.response.status_code == 200\\"\\n attributes/errors:\\n actions:\\n - key: error.message\\n action: delete\\n memory_limiter:\\n check_interval: 1s\\n limit_mib: 2000\\n batch:\\nexporters:\\n # Exporters configuration omitted\\nservice:\\n pipelines:\\n # Pipelines configuration omitted\\n```\\n","code":"var Component=(()=>{var d=Object.create;var s=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var h=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var v=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),_=(n,e)=>{for(var r in e)s(n,r,{get:e[r],enumerable:!0})},l=(n,e,r,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of m(e))!f.call(n,i)&&i!==r&&s(n,i,{get:()=>e[i],enumerable:!(o=u(e,i))||o.enumerable});return n};var y=(n,e,r)=>(r=n!=null?d(h(n)):{},l(e||!n||!n.__esModule?s(r,\\"default\\",{value:n,enumerable:!0}):r,n)),x=n=>l(s({},\\"__esModule\\",{value:!0}),n);var p=v((D,a)=>{a.exports=_jsx_runtime});var N={};_(N,{default:()=>g,frontmatter:()=>b});var t=y(p()),b={title:\\"Pruning incoming log volumes with Elastic\\",slug:\\"pruning-incoming-log-volumes\\",date:\\"2023-06-23\\",description:\\"To drop or not to drop (events) is the question, not only in deciding what events and fields to remove from your logs but also in the various tools used. Learn about using Beats, Logstash, Elastic Agent, Ingest Pipelines, and OTel Collectors.\\",author:[{slug:\\"carly-richmond\\"}],image:\\"blog-thumb-elastic-on-elastic.png\\",tags:[{slug:\\"log-analytics\\"}]};function c(n){let e={code:\\"code\\",pre:\\"pre\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`filebeat.inputs:\\n - type: filestream\\n id: my-logging-app\\n paths:\\n - /var/log/*.log\\n`})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`filebeat.inputs:\\n - type: filestream\\n id: my-logging-app\\n paths:\\n - /var/tmp/other.log\\n - /var/log/*.log\\nprocessors:\\n - drop_event:\\n when:\\n and:\\n - equals:\\n url.scheme: http\\n - equals:\\n url.path: /profile\\n`})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`filebeat.inputs:\\n - type: filestream\\n id: my-logging-app\\n paths:\\n - /var/tmp/other.log\\n - /var/log/*.log\\nprocessors:\\n - drop_fields:\\n when:\\n and:\\n - equals:\\n url.scheme: http\\n - equals:\\n http.response.status_code: 200\\n fields: [\\"event.message\\"]\\n ignore_missing: false\\n`})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-ruby\\",children:`input {\\n file {\\n id => \\"my-logging-app\\"\\n path => [ \\"/var/tmp/other.log\\", \\"/var/log/*.log\\" ]\\n }\\n}\\nfilter {\\n if [url.scheme] == \\"http\\" && [url.path] == \\"/profile\\" {\\n drop {\\n percentage => 80\\n }\\n }\\n}\\noutput {\\n elasticsearch {\\n hosts => \\"https://my-elasticsearch:9200\\"\\n data_stream => \\"true\\"\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-ruby\\",children:`# Input configuration omitted\\nfilter {\\n if [url.scheme] == \\"http\\" && [http.response.status_code] == 200 {\\n drop {\\n percentage => 80\\n }\\n mutate {\\n remove_field: [ \\"event.message\\" ]\\n }\\n }\\n}\\n# Output configuration omitted\\n`})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/my-logging-app-pipeline\\n{\\n \\"description\\": \\"Event and field dropping for my-logging-app\\",\\n \\"processors\\": [\\n {\\n \\"drop\\": {\\n \\"description\\" : \\"Drop event\\",\\n \\"if\\": \\"ctx?.url?.scheme == \'http\' && ctx?.url?.path == \'/profile\'\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"description\\" : \\"Drop field\\",\\n \\"field\\" : \\"event.message\\",\\n \\"if\\": \\"ctx?.url?.scheme == \'http\' && ctx?.http?.response?.status_code == 200\\",\\n \\"ignore_failure\\": false\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/my-logging-app-pipeline\\n{\\n \\"description\\": \\"Event and field dropping for my-logging-app with failures\\",\\n \\"processors\\": [\\n {\\n \\"drop\\": {\\n \\"description\\" : \\"Drop event\\",\\n \\"if\\": \\"ctx?.url?.scheme == \'http\' && ctx?.url?.path == \'/profile\'\\",\\n \\"ignore_failure\\": true\\n }\\n },\\n {\\n \\"remove\\": {\\n \\"description\\" : \\"Drop field\\",\\n \\"field\\" : \\"event.message\\",\\n \\"if\\": \\"ctx?.url?.scheme == \'http\' && ctx?.http?.response?.status_code == 200\\",\\n \\"ignore_failure\\": false\\n }\\n }\\n ],\\n \\"on_failure\\": [\\n {\\n \\"set\\": {\\n \\"description\\": \\"Set \'ingest.failure.message\'\\",\\n \\"field\\": \\"ingest.failure.message\\",\\n \\"value\\": \\"Ingestion issue\\"\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`receivers:\\n filelog:\\n include: [/var/tmp/other.log, /var/log/*.log]\\nprocessors:\\n filter/denylist:\\n error_mode: ignore\\n logs:\\n log_record:\\n - \'url.scheme == \\"info\\"\'\\n - \'url.path == \\"/profile\\"\'\\n - \\"http.response.status_code == 200\\"\\n attributes/errors:\\n actions:\\n - key: error.message\\n action: delete\\n memory_limiter:\\n check_interval: 1s\\n limit_mib: 2000\\n batch:\\nexporters:\\n # Exporters configuration omitted\\nservice:\\n pipelines:\\n # Pipelines configuration omitted\\n`})})]})}function g(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(c,{...n})}):c(n)}return x(N);})();\\n;return Component;"},"_id":"articles/pruning-incoming-log-volumes-with-elastic.mdx","_raw":{"sourceFilePath":"articles/pruning-incoming-log-volumes-with-elastic.mdx","sourceFileName":"pruning-incoming-log-volumes-with-elastic.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/pruning-incoming-log-volumes-with-elastic"},"type":"Article","imageUrl":"/assets/images/pruning-incoming-log-volumes/blog-thumb-elastic-on-elastic.png","readingTime":"5 min read","url":"/pruning-incoming-log-volumes","headings":[]},{"title":"Root cause analysis with logs: Elastic Observability\'s anomaly detection and log categorization","slug":"reduce-mttd-ml-machine-learning-observability","date":"2023-02-07","description":"Elastic Observability provides more than just log aggregation, metrics analysis, APM, and distributed tracing. Elastic’s machine learning capabilities help analyze the root cause of issues, allowing you to focus your time on the most important tasks.","image":"illustration-machine-learning-anomaly-1680x980.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"aiops","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nWith more and more applications moving to the cloud, an increasing amount of telemetry data (logs, metrics, traces) is being collected, which can help improve application performance, operational efficiencies, and business KPIs. However, analyzing this data is extremely tedious and time consuming given the tremendous amounts of data being generated. Traditional methods of alerting and simple pattern matching (visual or simple searching etc) are not sufficient for IT Operations teams and SREs. It’s like trying to find a needle in a haystack.\\n\\nIn this blog post, we’ll cover some of Elastic’s artificial intelligence for IT operations (AIOps) and machine learning (ML) capabilities for root cause analysis.\\n\\nElastic’s machine learning will help you investigate performance issues by providing anomaly detection and pinpointing potential root causes through time series analysis and log outlier detection. These capabilities will help you reduce time in finding that “needle” in the haystack.\\n\\nElastic’s platform enables you to get started on machine learning quickly. You don’t need to have a data science team or design a system architecture. Additionally, there’s no need to move data to a third-party framework for model training.\\n\\nPreconfigured machine learning models for observability and security are available. If those don\'t work well enough on your data, in-tool wizards guide you through the few steps needed to configure custom anomaly detection and train your model with supervised learning. To help get you started, there are several key features built into Elastic Observability to aid in analysis, helping bypass the need to run specific ML models. These features help minimize the time and analysis for logs.\\n\\nLet’s review some of these built-in ML features:\\n\\n**Anomaly detection:** Elastic Observability, when turned on ([see documentation](https://www.elastic.co/guide/en/kibana/current/xpack-ml-anomalies.html)), automatically detects anomalies by continuously modeling the normal behavior of your time series data — learning trends, periodicity, and more — in real time to identify anomalies, streamline root cause analysis, and reduce false positives. Anomaly detection runs in and scales with Elasticsearch and includes an intuitive UI.\\n\\n**Log categorization:** Using anomaly detection, Elastic also identifies patterns in your log events quickly. Instead of manually identifying similar logs, the logs categorization view lists log events that have been grouped, based on their messages and formats, so that you can take action quicker.\\n\\n**High-latency or erroneous transactions:** Elastic Observability’s APM capability helps you discover which attributes are contributing to increased transaction latency and identifies which attributes are most influential in distinguishing between transaction failures and successes. An overview of this capability is published here: [APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions).\\n\\n**AIOps Labs:** AIOps Labs provides two main capabilities using advanced statistical methods:\\n\\n- **Log spike detector** helps identify reasons for increases in log rates. It makes it easy to find and investigate causes of unusual spikes by using the analysis workflow view. Examine the histogram chart of the log rates for a given data view, and find the reason behind a particular change possibly in millions of log events across multiple fields and values.\\n- **Log pattern analysis** helps you find patterns in unstructured log messages and makes it easier to examine your data. It performs categorization analysis on a selected field of a data view, creates categories based on the data, and displays them together with a chart that shows the distribution of each category and an example document that matches the category.\\n\\n_ **In this blog, we will cover anomaly detection and log categorization against the popular “Hipster Shop app” developed by Google, and modified recently by OpenTelemetry.** _\\n\\nOverviews of high-latency capabilities can be found [here](https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions), and an overview of AIOps labs can be found [here](https://www.youtube.com/watch?v=jgHxzUNzfhM&list=PLhLSfisesZItlRZKgd-DtYukNfpThDAv_&index=5).\\n\\nIn this blog, we will examine a scenario where we use anomaly detection and log categorization to help identify a root cause of an issue in Hipster Shop.\\n\\n## Prerequisites and config\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)) on AWS. Deploying this on AWS is required for Elastic Serverless Forwarder.\\n- Utilize a version of the ever so popular [Hipster Shop](https://github.com/GoogleCloudPlatform/microservices-demo) demo application. It was originally written by Google to showcase Kubernetes across a multitude of variants available, such as the [OpenTelemetry Demo App](https://github.com/open-telemetry/opentelemetry-demo). The Elastic version is found [here](https://github.com/elastic/opentelemetry-demo).\\n- Ensure you have configured the app for either Elastic APM agents or OpenTelemetry agents. For more details, please refer to these two blogs: [Independence with OTel in Elastic](https://www.elastic.co/blog/opentelemetry-observability) and [Observability and security with OTel in Elastic](https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry). Additionally, review the [OTel documentation in Elastic](https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html).\\n- Look through an overview of [Elastic Observability APM capabilities](https://www.elastic.co/guide/en/observability/current/apm.html).\\n- Look through our [Anomaly detection documentation](https://www.elastic.co/guide/en/observability/8.5/inspect-log-anomalies.html) for logs and [log categorization documentation](https://www.elastic.co/guide/en/observability/8.5/categorize-logs.html).\\n\\nOnce you’ve instrumented your application with APM (Elastic or OTel) agents and are ingesting metrics and logs into Elastic Observability, you should see a service map for the application as follows:\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-service-map.png)\\n\\nIn our example, we’ve introduced issues to help walk you through the root cause analysis features: anomaly detection and log categorization. You might have a different set of anomalies and log categorization depending on how you load the application and/or introduce specific issues.\\n\\nAs part of the walk-through, we’ll assume we are a DevOps or SRE managing this application in production.\\n\\n## Root cause analysis\\n\\nWhile the application has been running normally for some time, you get a notification that some of the services are unhealthy. This can occur from the notification setting you’ve set up in Elastic or other external notification platforms (including customer related issues). In this instance, we’re assuming that customer support has called in multiple customer complaints about the website.\\n\\nHow do you as a DevOps or SRE investigate this? We will walk through two avenues in Elastic to investigate the issue:\\n\\n- Anomaly detection\\n- Log categorization\\n\\nWhile we show these two paths separately, they can be used in conjunction and are complementary, as they are both tools Elastic Observability provides to help you troubleshoot and identify a root cause.\\n\\n### Machine learning for anomaly detection\\n\\nElastic will detect anomalies based on historical patterns and identify a probability of these issues.\\n\\nStarting with the service map, you can see anomalies identified with red circles and as we select them, Elastic will provide a score for the anomaly.\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-service-map-anomaly-detection.png)\\n\\nIn this example, we can see that there is a score of 96 for a specific anomaly for the productCatalogService in the Hipster Shop application. An anomaly score indicates the significance of the anomaly compared to previously seen anomalies. More information on anomaly detection results can be found here. We can also dive deeper into the anomaly and analyze the details.\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-single-metric-viewer.png)\\n\\nWhat you will see for the productCatalogService is that there is a severe spike in average transaction latency time, which is the anomaly that was detected in the service map. Elastic’s machine learning has identified a specific metric anomaly (shown in the single metric view). It’s likely that customers are potentially responding to the slowness of the site and that the company is losing potential transactions.\\n\\nOne step to take next is to review all the other potential anomalies that we saw in the service map in a larger picture. Use an anomaly explorer to view all the anomalies that have been identified.\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-anomaly-explorer.png)\\n\\nElastic is identifying numerous services with anomalies. productCatalogService has the highest score and a good number or others: frontend, checkoutService, advertService, and others, also have high scores. However, this analysis is looking at just one metric.\\n\\nElastic can help detect anomalies across all types of data, such as kubernetes data, metrics, and traces. If we analyze across all these types (via individual jobs we’ve created in Elastic machine learning), we will see a more comprehensive view as to what is potentially causing this latency issue.\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-anomaly-explorer-job-selection.png)\\n\\nOnce all the potential jobs are selected and we’ve sorted by service.name, we can see that productCatalogService is still showing a high anomaly influencer score.\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-anomaly-explorer-timeline.png)\\n\\nIn addition to the chart giving us a visual of the anomalies, we can review all the potential anomalies. As you will notice, Elastic has also categorized these anomalies (see category examples column). As we scroll through the results, we notice a potential postgreSQL issue from the categorization, which also has a high 94 score. Machine learning has identified a “rare mlcategory,” meaning that it has rarely occurred, hence pointing to a potential cause of the issue customers are seeing.\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-machine-learning-service-name.png)\\n\\nWe also notice that this issue is potentially caused by pgbench , a popular postgreSQL tool to help benchmark the database. pgbench runs the same sequence of SQL commands over and over, possibly in multiple, concurrent database sessions. While pgbench is definitely a useful tool, it should not be used in a production environment as it causes heavy load on the database host, likely causing the higher latency issues on the site.\\n\\nWhile this may or may not be the ultimate root cause, we have rather quickly identified a potentially issue that has a high probability of being the root cause. An engineer likely intended to run pgbench against a staging database to evaluate its performance, and not the production environment.\\n\\n### Machine learning for log categorization\\n\\nElastic Observability’s service map has detected an anomaly, and in this part of the walk-through, we take a different approach by investigating the service details from the service map versus initially exploring the anomaly. When we explore the service details for productCatalogService, we see the following:\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-product-catalog-service.png)\\n\\nThe service details are identifying several things:\\n\\n1. There is an abnormally high latency compared to expected bounds of the service. We see that recently there was a higher than normal (upward of 1s latency) compared to the average to 275ms on average.\\n2. There is also a high failure rate for the same time frame as the high latency (lower left chart “ **Failed transaction rate** ”).\\n3. Additionally, we can see the transactions and one in particular /ListProduct has an abnormally high latency, in addition to a high failure rate.\\n4. We see productCatalogService has a dependency on postgreSQL.\\n5. We also see errors all related to postgreSQL.\\n\\nWe have an option to dig through the logs and analyze in Elastic or we can use a capability to identify the logs more easily.\\n\\nIf we go to Categories under Logs in Elastic Observability and search for postgresql.logto help identify postgresql logs that could be causing this error, we see that Elastic’s machine learning has automatically categorized the postgresql logs.\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-categories.png)\\n\\nWe notice two additional items:\\n\\n- There is a high count category (message count of 23,797 with a high anomaly of 70) related to pgbench (which is odd to see in production). Hence we search further for all pgbench related logs in Categories .\\n- We see an odd issue regarding terminating the connection (with a low count).\\n\\nWhile investigating the second error, which is severe, we can see logs from Categories before and after the error.\\n\\n![](/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-timestamp.png)\\n\\nThis troubleshooting shows postgreSQL having a FATAL error, the database shutting down prior to the error, and all connections terminating. Given the two immediate issues we identified, we have an idea that someone was running pgbench and this potentially overloaded the database, causing the latency issue that customers are seeing.\\n\\nThe next steps here could be to investigate anomaly detection and/or work with the developers to review the code and identify pgbench as part of the deployed configuration.\\n\\n## Conclusion\\n\\nI hope you’ve gotten an appreciation for how Elastic Observability can help you further identify and get closer to pinpointing root cause of issues without having to look for a “needle in a haystack.” Here’s a quick recap of lessons and what you learned:\\n\\n- Elastic Observability has numerous capabilities to help you reduce your time to find root cause and improve your MTTR (even MTTD). In particular, we reviewed the following two main capabilities in this blog:\\n\\n 1. **Anomaly detection:** Elastic Observability, when turned on ([see documentation](https://www.elastic.co/guide/en/kibana/current/xpack-ml-anomalies.html)), automatically detects anomalies by continuously modeling the normal behavior of your time series data — learning trends, periodicity, and more — in real time to identify anomalies, streamline root cause analysis, and reduce false positives. Anomaly detection runs in and scales with Elasticsearch and includes an intuitive UI.\\n 2. **Log categorization:** Using anomaly detection, Elastic also identifies patterns in your log events quickly. Instead of manually identifying similar logs, the logs categorization view lists log events that have been grouped based on their messages and formats so that you can take action quicker.\\n\\n- You learned how easy and simple it is to use Elastic Observability’s log categorization and anomaly detection capabilities without having to understand machine learning (which help drive these features), nor having to do any lengthy setups.\\n Ready to get started? [Register for Elastic Cloud](https://cloud.elastic.co/registration) and try out the features and capabilities I’ve outlined above.\\n\\n### Additional logging resources:\\n\\n- [Getting started with logging on Elastic (quickstart)](https://www.elastic.co/getting-started/observability/collect-and-analyze-logs)\\n- [Ingesting common known logs via integrations (compute node example)](https://www.elastic.co/guide/en/observability/current/logs-metrics-get-started.html)\\n- [List of integrations](https://docs.elastic.co/integrations)\\n- [Ingesting custom application logs into Elastic](https://www.elastic.co/blog/log-monitoring-management-enterprise)\\n- [Enriching logs in Elastic](https://www.elastic.co/blog/observability-logs-parsing-schema-read-write)\\n- Analyzing Logs with [Anomaly Detection (ML)](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability) and [AIOps](https://www.elastic.co/blog/observability-logs-machine-learning-aiops)\\n\\n### Common use case examples with logs:\\n\\n- [Nginx log management](https://youtu.be/ax04ZFWqVCg)\\n- [AWS VPC Flow log management](https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability)\\n- [Using OpenAI to analyze Kubernetes errors](https://www.elastic.co/blog/kubernetes-errors-elastic-observability-logs-openai)\\n- [PostgreSQL issue analysis with AIOps](https://youtu.be/Li5TJAWbz8Q)\\n","code":"var Component=(()=>{var g=Object.create;var o=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var p=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var w=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),f=(t,e)=>{for(var a in e)o(t,a,{get:e[a],enumerable:!0})},l=(t,e,a,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of m(e))!y.call(t,n)&&n!==a&&o(t,n,{get:()=>e[n],enumerable:!(s=u(e,n))||s.enumerable});return t};var b=(t,e,a)=>(a=t!=null?g(p(t)):{},l(e||!t||!t.__esModule?o(a,\\"default\\",{value:t,enumerable:!0}):a,t)),v=t=>l(o({},\\"__esModule\\",{value:!0}),t);var c=w((x,r)=>{r.exports=_jsx_runtime});var k={};f(k,{default:()=>d,frontmatter:()=>E});var i=b(c()),E={title:\\"Root cause analysis with logs: Elastic Observability\'s anomaly detection and log categorization\\",slug:\\"reduce-mttd-ml-machine-learning-observability\\",date:\\"2023-02-07\\",description:\\"Elastic Observability provides more than just log aggregation, metrics analysis, APM, and distributed tracing. Elastic\\\\u2019s machine learning capabilities help analyze the root cause of issues, allowing you to focus your time on the most important tasks.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"illustration-machine-learning-anomaly-1680x980.png\\",tags:[{slug:\\"aiops\\"},{slug:\\"log-analytics\\"}]};function h(t){let e={a:\\"a\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(e.p,{children:\\"With more and more applications moving to the cloud, an increasing amount of telemetry data (logs, metrics, traces) is being collected, which can help improve application performance, operational efficiencies, and business KPIs. However, analyzing this data is extremely tedious and time consuming given the tremendous amounts of data being generated. Traditional methods of alerting and simple pattern matching (visual or simple searching etc) are not sufficient for IT Operations teams and SREs. It\\\\u2019s like trying to find a needle in a haystack.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"In this blog post, we\\\\u2019ll cover some of Elastic\\\\u2019s artificial intelligence for IT operations (AIOps) and machine learning (ML) capabilities for root cause analysis.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic\\\\u2019s machine learning will help you investigate performance issues by providing anomaly detection and pinpointing potential root causes through time series analysis and log outlier detection. These capabilities will help you reduce time in finding that \\\\u201Cneedle\\\\u201D in the haystack.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic\\\\u2019s platform enables you to get started on machine learning quickly. You don\\\\u2019t need to have a data science team or design a system architecture. Additionally, there\\\\u2019s no need to move data to a third-party framework for model training.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Preconfigured machine learning models for observability and security are available. If those don\'t work well enough on your data, in-tool wizards guide you through the few steps needed to configure custom anomaly detection and train your model with supervised learning. To help get you started, there are several key features built into Elastic Observability to aid in analysis, helping bypass the need to run specific ML models. These features help minimize the time and analysis for logs.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Let\\\\u2019s review some of these built-in ML features:\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Anomaly detection:\\"}),\\" Elastic Observability, when turned on (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/xpack-ml-anomalies.html\\",rel:\\"nofollow\\",children:\\"see documentation\\"}),\\"), automatically detects anomalies by continuously modeling the normal behavior of your time series data \\\\u2014 learning trends, periodicity, and more \\\\u2014 in real time to identify anomalies, streamline root cause analysis, and reduce false positives. Anomaly detection runs in and scales with Elasticsearch and includes an intuitive UI.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Log categorization:\\"}),\\" Using anomaly detection, Elastic also identifies patterns in your log events quickly. Instead of manually identifying similar logs, the logs categorization view lists log events that have been grouped, based on their messages and formats, so that you can take action quicker.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"High-latency or erroneous transactions:\\"}),\\" Elastic Observability\\\\u2019s APM capability helps you discover which attributes are contributing to increased transaction latency and identifies which attributes are most influential in distinguishing between transaction failures and successes. An overview of this capability is published here: \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"APM correlations in Elastic Observability: Automatically identifying probable causes of slow or failed transactions\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"AIOps Labs:\\"}),\\" AIOps Labs provides two main capabilities using advanced statistical methods:\\"]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Log spike detector\\"}),\\" helps identify reasons for increases in log rates. It makes it easy to find and investigate causes of unusual spikes by using the analysis workflow view. Examine the histogram chart of the log rates for a given data view, and find the reason behind a particular change possibly in millions of log events across multiple fields and values.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Log pattern analysis\\"}),\\" helps you find patterns in unstructured log messages and makes it easier to examine your data. It performs categorization analysis on a selected field of a data view, creates categories based on the data, and displays them together with a chart that shows the distribution of each category and an example document that matches the category.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"_ \\",(0,i.jsx)(e.strong,{children:\\"In this blog, we will cover anomaly detection and log categorization against the popular \\\\u201CHipster Shop app\\\\u201D developed by Google, and modified recently by OpenTelemetry.\\"}),\\" _\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Overviews of high-latency capabilities can be found \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/apm-correlations-elastic-observability-root-cause-transactions\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", and an overview of AIOps labs can be found \\",(0,i.jsx)(e.a,{href:\\"https://www.youtube.com/watch?v=jgHxzUNzfhM&list=PLhLSfisesZItlRZKgd-DtYukNfpThDAv_&index=5\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"In this blog, we will examine a scenario where we use anomaly detection and log categorization to help identify a root cause of an issue in Hipster Shop.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up this demonstration:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[\\"Ensure you have an account on \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\") on AWS. Deploying this on AWS is required for Elastic Serverless Forwarder.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Utilize a version of the ever so popular \\",(0,i.jsx)(e.a,{href:\\"https://github.com/GoogleCloudPlatform/microservices-demo\\",rel:\\"nofollow\\",children:\\"Hipster Shop\\"}),\\" demo application. It was originally written by Google to showcase Kubernetes across a multitude of variants available, such as the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Demo App\\"}),\\". The Elastic version is found \\",(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Ensure you have configured the app for either Elastic APM agents or OpenTelemetry agents. For more details, please refer to these two blogs: \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OTel in Elastic\\"}),\\" and \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/implementing-kubernetes-observability-security-opentelemetry\\",rel:\\"nofollow\\",children:\\"Observability and security with OTel in Elastic\\"}),\\". Additionally, review the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/open-telemetry.html\\",rel:\\"nofollow\\",children:\\"OTel documentation in Elastic\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Look through an overview of \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/apm.html\\",rel:\\"nofollow\\",children:\\"Elastic Observability APM capabilities\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Look through our \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.5/inspect-log-anomalies.html\\",rel:\\"nofollow\\",children:\\"Anomaly detection documentation\\"}),\\" for logs and \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.5/categorize-logs.html\\",rel:\\"nofollow\\",children:\\"log categorization documentation\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once you\\\\u2019ve instrumented your application with APM (Elastic or OTel) agents and are ingesting metrics and logs into Elastic Observability, you should see a service map for the application as follows:\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-service-map.png\\",alt:\\"\\",width:\\"1713\\",height:\\"1239\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"In our example, we\\\\u2019ve introduced issues to help walk you through the root cause analysis features: anomaly detection and log categorization. You might have a different set of anomalies and log categorization depending on how you load the application and/or introduce specific issues.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"As part of the walk-through, we\\\\u2019ll assume we are a DevOps or SRE managing this application in production.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"root-cause-analysis\\",children:\\"Root cause analysis\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"While the application has been running normally for some time, you get a notification that some of the services are unhealthy. This can occur from the notification setting you\\\\u2019ve set up in Elastic or other external notification platforms (including customer related issues). In this instance, we\\\\u2019re assuming that customer support has called in multiple customer complaints about the website.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"How do you as a DevOps or SRE investigate this? We will walk through two avenues in Elastic to investigate the issue:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Anomaly detection\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Log categorization\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"While we show these two paths separately, they can be used in conjunction and are complementary, as they are both tools Elastic Observability provides to help you troubleshoot and identify a root cause.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"machine-learning-for-anomaly-detection\\",children:\\"Machine learning for anomaly detection\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic will detect anomalies based on historical patterns and identify a probability of these issues.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Starting with the service map, you can see anomalies identified with red circles and as we select them, Elastic will provide a score for the anomaly.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-service-map-anomaly-detection.png\\",alt:\\"\\",width:\\"1629\\",height:\\"1211\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"In this example, we can see that there is a score of 96 for a specific anomaly for the productCatalogService in the Hipster Shop application. An anomaly score indicates the significance of the anomaly compared to previously seen anomalies. More information on anomaly detection results can be found here. We can also dive deeper into the anomaly and analyze the details.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-single-metric-viewer.png\\",alt:\\"\\",width:\\"1643\\",height:\\"1120\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"What you will see for the productCatalogService is that there is a severe spike in average transaction latency time, which is the anomaly that was detected in the service map. Elastic\\\\u2019s machine learning has identified a specific metric anomaly (shown in the single metric view). It\\\\u2019s likely that customers are potentially responding to the slowness of the site and that the company is losing potential transactions.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"One step to take next is to review all the other potential anomalies that we saw in the service map in a larger picture. Use an anomaly explorer to view all the anomalies that have been identified.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-anomaly-explorer.png\\",alt:\\"\\",width:\\"1627\\",height:\\"1216\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic is identifying numerous services with anomalies. productCatalogService has the highest score and a good number or others: frontend, checkoutService, advertService, and others, also have high scores. However, this analysis is looking at just one metric.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic can help detect anomalies across all types of data, such as kubernetes data, metrics, and traces. If we analyze across all these types (via individual jobs we\\\\u2019ve created in Elastic machine learning), we will see a more comprehensive view as to what is potentially causing this latency issue.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-anomaly-explorer-job-selection.png\\",alt:\\"\\",width:\\"1653\\",height:\\"1214\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once all the potential jobs are selected and we\\\\u2019ve sorted by service.name, we can see that productCatalogService is still showing a high anomaly influencer score.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-anomaly-explorer-timeline.png\\",alt:\\"\\",width:\\"1625\\",height:\\"1212\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"In addition to the chart giving us a visual of the anomalies, we can review all the potential anomalies. As you will notice, Elastic has also categorized these anomalies (see category examples column). As we scroll through the results, we notice a potential postgreSQL issue from the categorization, which also has a high 94 score. Machine learning has identified a \\\\u201Crare mlcategory,\\\\u201D meaning that it has rarely occurred, hence pointing to a potential cause of the issue customers are seeing.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-machine-learning-service-name.png\\",alt:\\"\\",width:\\"1568\\",height:\\"904\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"We also notice that this issue is potentially caused by pgbench , a popular postgreSQL tool to help benchmark the database. pgbench runs the same sequence of SQL commands over and over, possibly in multiple, concurrent database sessions. While pgbench is definitely a useful tool, it should not be used in a production environment as it causes heavy load on the database host, likely causing the higher latency issues on the site.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"While this may or may not be the ultimate root cause, we have rather quickly identified a potentially issue that has a high probability of being the root cause. An engineer likely intended to run pgbench against a staging database to evaluate its performance, and not the production environment.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"machine-learning-for-log-categorization\\",children:\\"Machine learning for log categorization\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic Observability\\\\u2019s service map has detected an anomaly, and in this part of the walk-through, we take a different approach by investigating the service details from the service map versus initially exploring the anomaly. When we explore the service details for productCatalogService, we see the following:\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-product-catalog-service.png\\",alt:\\"\\",width:\\"1778\\",height:\\"1240\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"The service details are identifying several things:\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"There is an abnormally high latency compared to expected bounds of the service. We see that recently there was a higher than normal (upward of 1s latency) compared to the average to 275ms on average.\\"}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"There is also a high failure rate for the same time frame as the high latency (lower left chart \\\\u201C \\",(0,i.jsx)(e.strong,{children:\\"Failed transaction rate\\"}),\\" \\\\u201D).\\"]}),`\\n`,(0,i.jsx)(e.li,{children:\\"Additionally, we can see the transactions and one in particular /ListProduct has an abnormally high latency, in addition to a high failure rate.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"We see productCatalogService has a dependency on postgreSQL.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"We also see errors all related to postgreSQL.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"We have an option to dig through the logs and analyze in Elastic or we can use a capability to identify the logs more easily.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"If we go to Categories under Logs in Elastic Observability and search for postgresql.logto help identify postgresql logs that could be causing this error, we see that Elastic\\\\u2019s machine learning has automatically categorized the postgresql logs.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-categories.png\\",alt:\\"\\",width:\\"1999\\",height:\\"890\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"We notice two additional items:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"There is a high count category (message count of 23,797 with a high anomaly of 70) related to pgbench (which is odd to see in production). Hence we search further for all pgbench related logs in Categories .\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"We see an odd issue regarding terminating the connection (with a low count).\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"While investigating the second error, which is severe, we can see logs from Categories before and after the error.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/reduce-mttd-ml-machine-learning-observability/blog-elastic-timestamp.png\\",alt:\\"\\",width:\\"1999\\",height:\\"856\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"This troubleshooting shows postgreSQL having a FATAL error, the database shutting down prior to the error, and all connections terminating. Given the two immediate issues we identified, we have an idea that someone was running pgbench and this potentially overloaded the database, causing the latency issue that customers are seeing.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"The next steps here could be to investigate anomaly detection and/or work with the developers to review the code and identify pgbench as part of the deployed configuration.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"I hope you\\\\u2019ve gotten an appreciation for how Elastic Observability can help you further identify and get closer to pinpointing root cause of issues without having to look for a \\\\u201Cneedle in a haystack.\\\\u201D Here\\\\u2019s a quick recap of lessons and what you learned:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic Observability has numerous capabilities to help you reduce your time to find root cause and improve your MTTR (even MTTD). In particular, we reviewed the following two main capabilities in this blog:\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Anomaly detection:\\"}),\\" Elastic Observability, when turned on (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/xpack-ml-anomalies.html\\",rel:\\"nofollow\\",children:\\"see documentation\\"}),\\"), automatically detects anomalies by continuously modeling the normal behavior of your time series data \\\\u2014 learning trends, periodicity, and more \\\\u2014 in real time to identify anomalies, streamline root cause analysis, and reduce false positives. Anomaly detection runs in and scales with Elasticsearch and includes an intuitive UI.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Log categorization:\\"}),\\" Using anomaly detection, Elastic also identifies patterns in your log events quickly. Instead of manually identifying similar logs, the logs categorization view lists log events that have been grouped based on their messages and formats so that you can take action quicker.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[`You learned how easy and simple it is to use Elastic Observability\\\\u2019s log categorization and anomaly detection capabilities without having to understand machine learning (which help drive these features), nor having to do any lengthy setups.\\nReady to get started? `,(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Register for Elastic Cloud\\"}),\\" and try out the features and capabilities I\\\\u2019ve outlined above.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"additional-logging-resources\\",children:\\"Additional logging resources:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/getting-started/observability/collect-and-analyze-logs\\",rel:\\"nofollow\\",children:\\"Getting started with logging on Elastic (quickstart)\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/logs-metrics-get-started.html\\",rel:\\"nofollow\\",children:\\"Ingesting common known logs via integrations (compute node example)\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations\\",rel:\\"nofollow\\",children:\\"List of integrations\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/log-monitoring-management-enterprise\\",rel:\\"nofollow\\",children:\\"Ingesting custom application logs into Elastic\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-parsing-schema-read-write\\",rel:\\"nofollow\\",children:\\"Enriching logs in Elastic\\"})}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Analyzing Logs with \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:\\"Anomaly Detection (ML)\\"}),\\" and \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-machine-learning-aiops\\",rel:\\"nofollow\\",children:\\"AIOps\\"})]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"common-use-case-examples-with-logs\\",children:\\"Common use case examples with logs:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://youtu.be/ax04ZFWqVCg\\",rel:\\"nofollow\\",children:\\"Nginx log management\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability\\",rel:\\"nofollow\\",children:\\"AWS VPC Flow log management\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-errors-elastic-observability-logs-openai\\",rel:\\"nofollow\\",children:\\"Using OpenAI to analyze Kubernetes errors\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://youtu.be/Li5TJAWbz8Q\\",rel:\\"nofollow\\",children:\\"PostgreSQL issue analysis with AIOps\\"})}),`\\n`]})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,i.jsx)(e,{...t,children:(0,i.jsx)(h,{...t})}):h(t)}return v(k);})();\\n;return Component;"},"_id":"articles/reduce-mttd-ml-machine-learning-observability.mdx","_raw":{"sourceFilePath":"articles/reduce-mttd-ml-machine-learning-observability.mdx","sourceFileName":"reduce-mttd-ml-machine-learning-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/reduce-mttd-ml-machine-learning-observability"},"type":"Article","imageUrl":"/assets/images/reduce-mttd-ml-machine-learning-observability/illustration-machine-learning-anomaly-1680x980.png","readingTime":"12 min read","url":"/reduce-mttd-ml-machine-learning-observability","headings":[{"level":2,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":2,"title":"Root cause analysis","href":"#root-cause-analysis"},{"level":3,"title":"Machine learning for anomaly detection","href":"#machine-learning-for-anomaly-detection"},{"level":3,"title":"Machine learning for log categorization","href":"#machine-learning-for-log-categorization"},{"level":2,"title":"Conclusion","href":"#conclusion"},{"level":3,"title":"Additional logging resources:","href":"#additional-logging-resources"},{"level":3,"title":"Common use case examples with logs:","href":"#common-use-case-examples-with-logs"}]},{"title":"Build better Service Level Objectives (SLOs) from logs and metrics","slug":"service-level-objectives-slos-logs-metrics","date":"2024-02-23","description":"To help manage operations and business metrics, Elastic Observability\'s SLO (Service Level Objectives) feature was introduced in 8.12. This blog reviews this feature and how you can use it with Elastic\'s AI Assistant to meet SLOs.","image":"139686_-_Elastic_-_Headers_-_V1_3.jpg","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}},{"slug":"slo","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn today\'s digital landscape, applications are at the heart of both our personal and professional lives. We\'ve grown accustomed to these applications being perpetually available and responsive. This expectation places a significant burden on the shoulders of developers and operations teams.\\n\\nSite reliability engineers (SREs) face the challenging task of sifting through vast quantities of data, not just from the applications themselves but also from the underlying infrastructure. In addition to data analysis, they are responsible for ensuring the effective use and development of operational tools. The growing volume of data, the daily resolution of issues, and the continuous evolution of tools and processes can detract from the focus on business performance.\\n\\nElastic Observability offers a solution to this challenge. It enables SREs to integrate and examine all telemetry data (logs, metrics, traces, and profiling) in conjunction with business metrics. This comprehensive approach to data analysis fosters operational excellence, boosts productivity, and yields critical insights, all of which are integral to maintaining high-performing applications in a demanding digital environment.\\n\\nTo help manage operations and business metrics, Elastic Observability\'s SLO (Service Level Objectives) feature was introduced in [8.12](https://www.elastic.co/guide/en/observability/8.12/slo.html). This feature enables setting measurable performance targets for services, such as [availability, latency, traffic, errors, and saturation or define your own](https://sre.google/sre-book/monitoring-distributed-systems/). Key components include:\\n\\n- Defining and monitoring SLIs (Service Level Indicators)\\n\\n- Monitoring error budgets indicating permissible performance shortfalls\\n\\n- Alerting on burn rates showing error budget consumption\\n\\nUsers can monitor SLOs in real-time with dashboards, track historical performance, and receive alerts for potential issues. Additionally, SLO dashboard panels offer customized visualizations.\\n\\nService Level Objectives (SLOs) are generally available for our Platinum and Enterprise subscription customers.\\n\\n\\n\\nIn this blog, we will outline the following:\\n\\n- What are SLOs? A Google SRE perspective\\n\\n- Several scenarios of defining and managing SLOs\\n\\n## Service Level Objective overview\\n\\nService Level Objectives (SLOs) are a crucial component for Site Reliability Engineering (SRE), as detailed in [Google\'s SRE Handbook](https://sre.google/sre-book/table-of-contents/). They provide a framework for quantifying and managing the reliability of a service. The key elements of SLOs include:\\n\\n- **Service Level Indicators (SLIs):** These are carefully selected metrics, such as uptime, latency, throughput, error rates, or other important metrics, that represent the aspects of the service and are important from an operations or business perspective. Hence, an SLI is a measure of the service level provided (latency, uptime, etc.), and it is defined as a ratio of good over total events, with a range between 0% and 100%.\\n\\n- **Service Level Objective (SLO):** An SLO is the target value for a service level measured as a percentage by an SLI. Above the threshold, the service is compliant. As an example, if we want to use service availability as an SLI, with the number of successful responses at 99.9%, then any time the number of failed responses is \\\\> .1%, the SLO will be out of compliance.\\n- **Error budget:** This represents the threshold of acceptable errors, balancing the need for reliability with practical limits. It is defined as 100% minus the SLO quantity of errors that is tolerated.\\n- **Burn rate:** This concept relates to how quickly the service is consuming its error budget, which is the acceptable threshold for unreliability agreed upon by the service providers and its users.\\n\\nUnderstanding these concepts and effectively implementing them is essential for maintaining a balance between innovation and reliability in service delivery. For more detailed information, you can refer to [Google\'s SRE Handbook](https://sre.google/workbook/slo-document/).\\n\\nOne main thing to remember is that SLO monitoring is _not_ incident monitoring. SLO monitoring is a proactive, strategic approach designed to ensure that services meet established performance standards and user expectations. It involves tracking Service Level Objectives, error budgets, and the overall reliability of a service over time. This predictive method helps in preventing issues that could impact users and aligns service performance with business objectives.\\n\\nIn contrast, incident monitoring is a reactive process focused on detecting, responding to, and mitigating service incidents as they occur. It aims to address unexpected disruptions or failures in real time, minimizing downtime and impact on service. This includes monitoring system health, errors, and response times during incidents, with a focus on rapid response to minimize disruption and preserve the service\'s reputation.\\n\\nElastic\xae’s SLO capability is based directly off the Google SRE Handbook. All the definitions and semantics are utilized as described in Google’s SRE handbook. Hence users can perform the following on SLOs in Elastic:\\n\\n- Define an SLO on an SLI such as KQL (log based query), service availability, service latency, custom metric, histogram metric, or a timeslice metric. Additionally, set the appropriate threshold.\\n\\n- Utilize occurrence versus time slice based budgeting. Occurrences is the number of good events over the number of total events to compute the SLO. Timeslices break the overall time window into slammer slices of a defined duration and compute the number of good slices over the total slices to compute the SLO. Timeslice targets are more accurate and useful when calculating things like a service’s SLO when trying to meet agreed upon customer targets.\\n\\n- Manage all the SLOs in a singular location.\\n\\n- Trigger alerts from the defined SLO, whether the SLI is off, burn rate is used up, or the error rate is X.\\n\\n- Create unique service level dashboards with SLO information for a more comprehensive view of the service.\\n\\n![Create alerts](/assets/images/service-level-objectives-slos-logs-metrics/1-slo-blog.png)\\n\\n![Create dashboards](/assets/images/service-level-objectives-slos-logs-metrics/2-slo-blog.png)\\n\\nSREs need to be able to manage business metrics.\\n\\n## SLOs based on logs: NGINX availability\\n\\nDefining SLOs does not always mean metrics need to be used. Logs are a rich form of information, even when they have metrics embedded in them. Hence it’s useful to understand your business and operations status based on logs.\\n\\nElastic allows you to create an SLO based on specific fields in the log message, which don’t have to be metrics. A simple example is a simple multi-tier app that has a web server layer (nginx), a processing layer, and a database layer.\\n\\nLet’s say that your processing layer is managing a significant number of requests. You want to ensure that the service is up properly. The best way is to ensure that all http.response.status_code are less than 500. Anything less ensures the service is up and any errors (like 404) are all user or client errors versus server errors.\\n\\n![expanded document](/assets/images/service-level-objectives-slos-logs-metrics/3-slo-blog.png)\\n\\nIf we use Discover in Elastic, we see that there are close to 2M log messages over a seven-day time frame.\\n\\n![17k](/assets/images/service-level-objectives-slos-logs-metrics/4-slo-blog.png)\\n\\nAdditionally, the number of messages with http.response.status_code \\\\> 500 is minimal, like 17K.\\n\\nRather than creating an alert, we can create an SLO with this query:\\n\\n![edit SLO](/assets/images/service-level-objectives-slos-logs-metrics/5-slo-blog.png)\\n\\nWe chose to use occurrences as the budgeting method to keep things simple.\\n\\nOnce defined, we can see how well our SLO is performing over a seven-day time frame. We can see not only the SLO, but also the burn rate, the historical SLI, and error budget, and any specific alerts against the SLO.\\n\\n![SLOs](/assets/images/service-level-objectives-slos-logs-metrics/6-slo-blog.png)\\n\\n![nginx server availability ](/assets/images/service-level-objectives-slos-logs-metrics/7-slo-blog.png)\\n\\nNot only do we get information about the violation, but we also get:\\n\\n- Historical SLI (7 days)\\n\\n- Error budget burn down\\n\\n- Good vs. bad events (24 hours)\\n\\n![Percentages](/assets/images/service-level-objectives-slos-logs-metrics/8-slo-blog.png)\\n\\nWe can see how we’ve easily burned through our error budget.\\n\\nHence something must be going on with nginx. To investigate, all we need to do is utilize the [AI Assistant](https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability), and use its natural language interface to ask questions to help analyze the situation.\\n\\nLet’s use Elastic’s AI Assistant to analyze the breakdown of http.response.status_code across all the logs from the past seven days. This helps us understand how many 50X errors we are getting.\\n\\n![count of http response status code](/assets/images/service-level-objectives-slos-logs-metrics/9-slo-blog.png)\\n\\nAs we can see, the number of 502s is minimal compared to the number of overall messages, but it is affecting our SLO.\\n\\nHowever, it seems like Nginx is having an issue. In order to reduce the issue, we also ask the AI Assistant how to work on this error. Specifically, we ask if there is an internal runbook the SRE team has created.\\n\\n![ai assistant thread](/assets/images/service-level-objectives-slos-logs-metrics/10-slo-blog.png)\\n\\nAI Assistant gets a runbook the team has added to its knowledge base. I can now analyze and try to resolve or reduce the issue with nginx.\\n\\nWhile this is a simple example, there are an endless number of possibilities that can be defined based on KQL. Some other simple examples:\\n\\n- 99% of requests occur under 200ms\\n\\n- 99% of log message are not errors\\n\\n## Application SLOs: OpenTelemetry demo cartservice\\n\\nA common application developers and SREs use to learn about OpenTelemetry and test out Observability features is the [OpenTelemetry demo](https://github.com/elastic/opentelemetry-demo).\\n\\nThis demo has [feature flags](https://opentelemetry.io/docs/demo/feature-flags/) to simulate issues. With Elastic’s alerting and SLO capability, you can also determine how well the entire application is performing and how well your customer experience is holding up when these feature flags are used.\\n\\n[Elastic supports OpenTelemetry by taking OTLP directly with no need for an Elastic specific agent](https://www.elastic.co/blog/opentelemetry-observability). You can send in OpenTelemetry data directly from the application (through OTel libraries) and through the collector.\\n\\nWe’ve brought up the OpenTelemetry demo on a K8S cluster (AWS EKS) and turned on the cartservice feature flag. This inserts errors into the cartservice. We’ve also created two SLOs to monitor the cartservice’s availability and latency.\\n\\n![SLOs](/assets/images/service-level-objectives-slos-logs-metrics/11-slo-blog.png)\\n\\nWe can see that the cartservice’s availability is violated. As we drill down, we see that there aren’t as many successful transactions, which is affecting the SLO.\\n\\n![cartservice-otel](/assets/images/service-level-objectives-slos-logs-metrics/12-slo-blog.png)\\n\\nAs we drill into the service, we can see in Elastic APM that there is a higher than normal failure rate of about 5.5% for the emptyCart service.\\n\\n![apm](/assets/images/service-level-objectives-slos-logs-metrics/13-slo-blog.png)\\n\\nWe can investigate this further in APM, but that is a discussion for another blog. Stay tuned to see how we can use Elastic’s machine learning, AIOps, and AI Assistant to understand the issue.\\n\\n## Conclusion\\n\\nSLOs allow you to set clear, measurable targets for your service performance, based on factors like availability, response times, error rates, and other key metrics. Hopefully with the overview we’ve provided in this blog, you can see that:\\n\\n- SLOs can be based on logs. In Elastic, you can use KQL to essentially find and filter on specific logs and log fields to monitor and trigger SLOs.\\n\\n- AI Assistant is a valuable, easy-to-use capability to analyze, troubleshoot, and even potentially resolve SLO issues.\\n\\n- APM Service based SLOs are easy to create and manage with integration to Elastic APM. We also use OTel telemetry to help monitor SLOs.\\n\\nFor more information on SLOs in Elastic, check out [Elastic documentation](https://www.elastic.co/guide/en/observability/current/slo.html) and the following resources:\\n\\n- [What’s new in Elastic Observability 8.12](https://www.elastic.co/guide/en/observability/8.12/slo.html)\\n\\n- [Introducing the Elastic AI Assistant](https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability)\\n\\n- [Elastic OpenTelemetry support](https://www.elastic.co/blog/opentelemetry-observability)\\n\\nReady to get started? Sign up for [Elastic Cloud](https://cloud.elastic.co/registration) and try out the features and capabilities I’ve outlined above to get the most value and visibility out of your SLOs.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,v=Object.prototype.hasOwnProperty;var f=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),b=(t,e)=>{for(var s in e)r(t,s,{get:e[s],enumerable:!0})},o=(t,e,s,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of g(e))!v.call(t,n)&&n!==s&&r(t,n,{get:()=>e[n],enumerable:!(a=u(e,n))||a.enumerable});return t};var y=(t,e,s)=>(s=t!=null?p(m(t)):{},o(e||!t||!t.__esModule?r(s,\\"default\\",{value:t,enumerable:!0}):s,t)),w=t=>o(r({},\\"__esModule\\",{value:!0}),t);var c=f((A,l)=>{l.exports=_jsx_runtime});var O={};b(O,{default:()=>d,frontmatter:()=>S});var i=y(c()),S={title:\\"Build better Service Level Objectives (SLOs) from logs and metrics\\",slug:\\"service-level-objectives-slos-logs-metrics\\",date:\\"2024-02-23\\",description:\\"To help manage operations and business metrics, Elastic Observability\'s SLO (Service Level Objectives) feature was introduced in 8.12. This blog reviews this feature and how you can use it with Elastic\'s AI Assistant to meet SLOs.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"139686_-_Elastic_-_Headers_-_V1_3.jpg\\",tags:[{slug:\\"log-analytics\\"},{slug:\\"slo\\"},{slug:\\"opentelemetry\\"},{slug:\\"kubernetes\\"}]};function h(t){let e={a:\\"a\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...t.components},{Video:s}=e;return s||L(\\"Video\\",!0),(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(e.p,{children:\\"In today\'s digital landscape, applications are at the heart of both our personal and professional lives. We\'ve grown accustomed to these applications being perpetually available and responsive. This expectation places a significant burden on the shoulders of developers and operations teams.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Site reliability engineers (SREs) face the challenging task of sifting through vast quantities of data, not just from the applications themselves but also from the underlying infrastructure. In addition to data analysis, they are responsible for ensuring the effective use and development of operational tools. The growing volume of data, the daily resolution of issues, and the continuous evolution of tools and processes can detract from the focus on business performance.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic Observability offers a solution to this challenge. It enables SREs to integrate and examine all telemetry data (logs, metrics, traces, and profiling) in conjunction with business metrics. This comprehensive approach to data analysis fosters operational excellence, boosts productivity, and yields critical insights, all of which are integral to maintaining high-performing applications in a demanding digital environment.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"To help manage operations and business metrics, Elastic Observability\'s SLO (Service Level Objectives) feature was introduced in \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.12/slo.html\\",rel:\\"nofollow\\",children:\\"8.12\\"}),\\". This feature enables setting measurable performance targets for services, such as \\",(0,i.jsx)(e.a,{href:\\"https://sre.google/sre-book/monitoring-distributed-systems/\\",rel:\\"nofollow\\",children:\\"availability, latency, traffic, errors, and saturation or define your own\\"}),\\". Key components include:\\"]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Defining and monitoring SLIs (Service Level Indicators)\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Monitoring error budgets indicating permissible performance shortfalls\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Alerting on burn rates showing error budget consumption\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Users can monitor SLOs in real-time with dashboards, track historical performance, and receive alerts for potential issues. Additionally, SLO dashboard panels offer customized visualizations.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Service Level Objectives (SLOs) are generally available for our Platinum and Enterprise subscription customers.\\"}),`\\n`,(0,i.jsx)(s,{vidyardUuid:\\"ngfY9mrkNEkjmpRY4Qd5Pb\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"In this blog, we will outline the following:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"What are SLOs? A Google SRE perspective\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Several scenarios of defining and managing SLOs\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"service-level-objective-overview\\",children:\\"Service Level Objective overview\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Service Level Objectives (SLOs) are a crucial component for Site Reliability Engineering (SRE), as detailed in \\",(0,i.jsx)(e.a,{href:\\"https://sre.google/sre-book/table-of-contents/\\",rel:\\"nofollow\\",children:\\"Google\'s SRE Handbook\\"}),\\". They provide a framework for quantifying and managing the reliability of a service. The key elements of SLOs include:\\"]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Service Level Indicators (SLIs):\\"}),\\" These are carefully selected metrics, such as uptime, latency, throughput, error rates, or other important metrics, that represent the aspects of the service and are important from an operations or business perspective. Hence, an SLI is a measure of the service level provided (latency, uptime, etc.), and it is defined as a ratio of good over total events, with a range between 0% and 100%.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Service Level Objective (SLO):\\"}),\\" An SLO is the target value for a service level measured as a percentage by an SLI. Above the threshold, the service is compliant. As an example, if we want to use service availability as an SLI, with the number of successful responses at 99.9%, then any time the number of failed responses is > .1%, the SLO will be out of compliance.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Error budget:\\"}),\\" This represents the threshold of acceptable errors, balancing the need for reliability with practical limits. It is defined as 100% minus the SLO quantity of errors that is tolerated.\\"]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.strong,{children:\\"Burn rate:\\"}),\\" This concept relates to how quickly the service is consuming its error budget, which is the acceptable threshold for unreliability agreed upon by the service providers and its users.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Understanding these concepts and effectively implementing them is essential for maintaining a balance between innovation and reliability in service delivery. For more detailed information, you can refer to \\",(0,i.jsx)(e.a,{href:\\"https://sre.google/workbook/slo-document/\\",rel:\\"nofollow\\",children:\\"Google\'s SRE Handbook\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"One main thing to remember is that SLO monitoring is \\",(0,i.jsx)(e.em,{children:\\"not\\"}),\\" incident monitoring. SLO monitoring is a proactive, strategic approach designed to ensure that services meet established performance standards and user expectations. It involves tracking Service Level Objectives, error budgets, and the overall reliability of a service over time. This predictive method helps in preventing issues that could impact users and aligns service performance with business objectives.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"In contrast, incident monitoring is a reactive process focused on detecting, responding to, and mitigating service incidents as they occur. It aims to address unexpected disruptions or failures in real time, minimizing downtime and impact on service. This includes monitoring system health, errors, and response times during incidents, with a focus on rapid response to minimize disruption and preserve the service\'s reputation.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic\\\\xAE\\\\u2019s SLO capability is based directly off the Google SRE Handbook. All the definitions and semantics are utilized as described in Google\\\\u2019s SRE handbook. Hence users can perform the following on SLOs in Elastic:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Define an SLO on an SLI such as KQL (log based query), service availability, service latency, custom metric, histogram metric, or a timeslice metric. Additionally, set the appropriate threshold.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Utilize occurrence versus time slice based budgeting. Occurrences is the number of good events over the number of total events to compute the SLO. Timeslices break the overall time window into slammer slices of a defined duration and compute the number of good slices over the total slices to compute the SLO. Timeslice targets are more accurate and useful when calculating things like a service\\\\u2019s SLO when trying to meet agreed upon customer targets.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Manage all the SLOs in a singular location.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Trigger alerts from the defined SLO, whether the SLI is off, burn rate is used up, or the error rate is X.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Create unique service level dashboards with SLO information for a more comprehensive view of the service.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/1-slo-blog.png\\",alt:\\"Create alerts\\",width:\\"1601\\",height:\\"1047\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/2-slo-blog.png\\",alt:\\"Create dashboards\\",width:\\"1999\\",height:\\"1384\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"SREs need to be able to manage business metrics.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"slos-based-on-logs-nginx-availability\\",children:\\"SLOs based on logs: NGINX availability\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Defining SLOs does not always mean metrics need to be used. Logs are a rich form of information, even when they have metrics embedded in them. Hence it\\\\u2019s useful to understand your business and operations status based on logs.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic allows you to create an SLO based on specific fields in the log message, which don\\\\u2019t have to be metrics. A simple example is a simple multi-tier app that has a web server layer (nginx), a processing layer, and a database layer.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Let\\\\u2019s say that your processing layer is managing a significant number of requests. You want to ensure that the service is up properly. The best way is to ensure that all http.response.status_code are less than 500. Anything less ensures the service is up and any errors (like 404) are all user or client errors versus server errors.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/3-slo-blog.png\\",alt:\\"expanded document\\",width:\\"1607\\",height:\\"1045\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"If we use Discover in Elastic, we see that there are close to 2M log messages over a seven-day time frame.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/4-slo-blog.png\\",alt:\\"17k\\",width:\\"1605\\",height:\\"477\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Additionally, the number of messages with http.response.status_code > 500 is minimal, like 17K.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Rather than creating an alert, we can create an SLO with this query:\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/5-slo-blog.png\\",alt:\\"edit SLO\\",width:\\"1262\\",height:\\"1536\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"We chose to use occurrences as the budgeting method to keep things simple.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once defined, we can see how well our SLO is performing over a seven-day time frame. We can see not only the SLO, but also the burn rate, the historical SLI, and error budget, and any specific alerts against the SLO.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/6-slo-blog.png\\",alt:\\"SLOs\\",width:\\"1999\\",height:\\"823\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/7-slo-blog.png\\",alt:\\"nginx server availability \\",width:\\"1999\\",height:\\"1007\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Not only do we get information about the violation, but we also get:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Historical SLI (7 days)\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Error budget burn down\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"Good vs. bad events (24 hours)\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/8-slo-blog.png\\",alt:\\"Percentages\\",width:\\"1999\\",height:\\"1014\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"We can see how we\\\\u2019ve easily burned through our error budget.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Hence something must be going on with nginx. To investigate, all we need to do is utilize the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability\\",rel:\\"nofollow\\",children:\\"AI Assistant\\"}),\\", and use its natural language interface to ask questions to help analyze the situation.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Let\\\\u2019s use Elastic\\\\u2019s AI Assistant to analyze the breakdown of http.response.status_code across all the logs from the past seven days. This helps us understand how many 50X errors we are getting.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/9-slo-blog.png\\",alt:\\"count of http response status code\\",width:\\"1999\\",height:\\"1007\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"As we can see, the number of 502s is minimal compared to the number of overall messages, but it is affecting our SLO.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"However, it seems like Nginx is having an issue. In order to reduce the issue, we also ask the AI Assistant how to work on this error. Specifically, we ask if there is an internal runbook the SRE team has created.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/10-slo-blog.png\\",alt:\\"ai assistant thread\\",width:\\"1999\\",height:\\"1000\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"AI Assistant gets a runbook the team has added to its knowledge base. I can now analyze and try to resolve or reduce the issue with nginx.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"While this is a simple example, there are an endless number of possibilities that can be defined based on KQL. Some other simple examples:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"99% of requests occur under 200ms\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"99% of log message are not errors\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"application-slos-opentelemetry-demo-cartservice\\",children:\\"Application SLOs: OpenTelemetry demo cartservice\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"A common application developers and SREs use to learn about OpenTelemetry and test out Observability features is the \\",(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"OpenTelemetry demo\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"This demo has \\",(0,i.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/demo/feature-flags/\\",rel:\\"nofollow\\",children:\\"feature flags\\"}),\\" to simulate issues. With Elastic\\\\u2019s alerting and SLO capability, you can also determine how well the entire application is performing and how well your customer experience is holding up when these feature flags are used.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Elastic supports OpenTelemetry by taking OTLP directly with no need for an Elastic specific agent\\"}),\\". You can send in OpenTelemetry data directly from the application (through OTel libraries) and through the collector.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"We\\\\u2019ve brought up the OpenTelemetry demo on a K8S cluster (AWS EKS) and turned on the cartservice feature flag. This inserts errors into the cartservice. We\\\\u2019ve also created two SLOs to monitor the cartservice\\\\u2019s availability and latency.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/11-slo-blog.png\\",alt:\\"SLOs\\",width:\\"1999\\",height:\\"979\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"We can see that the cartservice\\\\u2019s availability is violated. As we drill down, we see that there aren\\\\u2019t as many successful transactions, which is affecting the SLO.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/12-slo-blog.png\\",alt:\\"cartservice-otel\\",width:\\"1999\\",height:\\"1011\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"As we drill into the service, we can see in Elastic APM that there is a higher than normal failure rate of about 5.5% for the emptyCart service.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/service-level-objectives-slos-logs-metrics/13-slo-blog.png\\",alt:\\"apm\\",width:\\"1999\\",height:\\"1001\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"We can investigate this further in APM, but that is a discussion for another blog. Stay tuned to see how we can use Elastic\\\\u2019s machine learning, AIOps, and AI Assistant to understand the issue.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"SLOs allow you to set clear, measurable targets for your service performance, based on factors like availability, response times, error rates, and other key metrics. Hopefully with the overview we\\\\u2019ve provided in this blog, you can see that:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"SLOs can be based on logs. In Elastic, you can use KQL to essentially find and filter on specific logs and log fields to monitor and trigger SLOs.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"AI Assistant is a valuable, easy-to-use capability to analyze, troubleshoot, and even potentially resolve SLO issues.\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:\\"APM Service based SLOs are easy to create and manage with integration to Elastic APM. We also use OTel telemetry to help monitor SLOs.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"For more information on SLOs in Elastic, check out \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/slo.html\\",rel:\\"nofollow\\",children:\\"Elastic documentation\\"}),\\" and the following resources:\\"]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.12/slo.html\\",rel:\\"nofollow\\",children:\\"What\\\\u2019s new in Elastic Observability 8.12\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability\\",rel:\\"nofollow\\",children:\\"Introducing the Elastic AI Assistant\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.li,{children:[`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Elastic OpenTelemetry support\\"})}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Ready to get started? Sign up for \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and try out the features and capabilities I\\\\u2019ve outlined above to get the most value and visibility out of your SLOs.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,i.jsx)(e,{...t,children:(0,i.jsx)(h,{...t})}):h(t)}function L(t,e){throw new Error(\\"Expected \\"+(e?\\"component\\":\\"object\\")+\\" `\\"+t+\\"` to be defined: you likely forgot to import, pass, or provide it.\\")}return w(O);})();\\n;return Component;"},"_id":"articles/service-level-objectives-slos-logs-metrics.mdx","_raw":{"sourceFilePath":"articles/service-level-objectives-slos-logs-metrics.mdx","sourceFileName":"service-level-objectives-slos-logs-metrics.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/service-level-objectives-slos-logs-metrics"},"type":"Article","imageUrl":"/assets/images/service-level-objectives-slos-logs-metrics/139686_-_Elastic_-_Headers_-_V1_3.jpg","readingTime":"11 min read","url":"/service-level-objectives-slos-logs-metrics","headings":[{"level":2,"title":"Service Level Objective overview","href":"#service-level-objective-overview"},{"level":2,"title":"SLOs based on logs: NGINX availability","href":"#slos-based-on-logs-nginx-availability"},{"level":2,"title":"Application SLOs: OpenTelemetry demo cartservice","href":"#application-slos-opentelemetry-demo-cartservice"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Simplifying log data management: Harness the power of flexible routing with Elastic","slug":"simplifying-log-data-management-flexible-routing","date":"2023-06-13","description":"The reroute processor, available as of Elasticsearch 8.8, allows customizable rules for routing documents, such as logs, into data streams for better control of processing, retention, and permissions with examples that you can try on your own.","image":"observability-digital-transformation-1.jpg","author":[{"slug":"felix-barnsteiner","type":"Author","_raw":{}},{"slug":"nicolas-ruflin","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn Elasticsearch 8.8, we’re introducing the [reroute processor](https://www.elastic.co/guide/en/elasticsearch/reference/current/reroute-processor.html) in technical preview that makes it possible to send documents, such as logs, to different [data streams](https://www.elastic.co/guide/en/elasticsearch/reference/current/data-streams.html), according to flexible routing rules. When using Elastic Observability, this gives you more granular control over your data with regard to retention, permissions, and processing with all the potential benefits of the [data stream naming scheme](https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme). While optimized for data streams, the reroute processor also works with classic indices. This blog post contains examples on how to use the reroute processor that you can try on your own by executing the snippets in the [Kibana dev tools](https://www.elastic.co/guide/en/kibana/current/console-kibana.html).\\n\\nElastic Observability offers a wide range of [integrations](https://www.elastic.co/integrations/data-integrations?solution=observability) that help you to monitor your applications and infrastructure. These integrations are added as policies to [Elastic agents](https://www.elastic.co/guide/en/fleet/current/elastic-agent-installation.html), which help ingest telemetry into Elastic Observability. Several examples of these integrations include the ability to ingest logs from systems that send a stream of logs from different applications, such as [Amazon Kinesis Data Firehose](https://www.elastic.co/guide/en/kinesis/current/aws-firehose-setup-guide.html), [Kubernetes container logs](https://docs.elastic.co/en/integrations/kubernetes), and [syslog](https://docs.elastic.co/integrations/tcp). One challenge is that these multiplexed log streams are sending data to the same Elasticsearch data stream, such as logs-syslog-default. This makes it difficult to create parsing rules in ingest pipelines and dashboards for specific technologies, such as the ones from the [Nginx](https://docs.elastic.co/en/integrations/nginx) and [Apache](https://docs.elastic.co/en/integrations/apache) integrations. That’s because in Elasticsearch, in combination with the [data stream naming scheme](https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme), the processing and the schema are both encapsulated in a data stream.\\n\\nThe reroute processor helps you tease apart data from a generic data stream and send it to a more specific one. You may use that mechanism to send logs to a data stream that is set up by the Nginx integration, for example, so that the logs are parsed with that integration and you can use the integration’s prebuilt dashboards or create custom ones with the fields, such as the url, the status code, and the response time that the Nginx pipeline has parsed out of the Nginx log message. You can also split out/separate regular Nginx logs and errors with the reroute processor, providing further separation ability and categorization of logs.\\n\\n![routing pipeline](/assets/images/simplifying-log-data-management-flexible-routing/blog-elastic-routing-pipeline.png)\\n\\n## Example use case\\n\\nTo use the reroute processor, first:\\n\\n1. Ensure you are on Elasticsearch 8.8\\n\\n2. Ensure you have permissions to manage indices and data streams\\n\\n3. If you don’t already have an account on [Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home), sign up for one\\n\\nNext, you’ll need to [set up a data stream](https://www.elastic.co/guide/en/elasticsearch/reference/master/set-up-a-data-stream.html) and create a custom Elasticsearch [ingest pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/master/ingest.html) that is called as the [default pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html#set-default-pipeline). Below we go through this step by step for the “mydata” data set that we’ll simulate ingesting container logs into. We start with a basic example and extend it from there.\\n\\nThe following steps should be utilized in the Elastic console, which is found at **Management -\\\\> Dev tools -\\\\> Console**. First, we need an an ingest pipeline and a template for the data stream:\\n\\n```bash\\nPUT _ingest/pipeline/logs-mydata\\n{\\n \\"description\\": \\"Routing for mydata\\",\\n \\"processors\\": [\\n {\\n \\"reroute\\": {\\n }\\n }\\n ]\\n}\\n```\\n\\nThis creates an ingest pipeline with an empty reroute processor. To make use of it, we need an index template:\\n\\n```bash\\nPUT _index_template/logs-mydata\\n{\\n \\"index_patterns\\": [\\n \\"logs-mydata-*\\"\\n ],\\n \\"data_stream\\": {},\\n \\"priority\\": 200,\\n \\"template\\": {\\n \\"settings\\": {\\n \\"index.default_pipeline\\": \\"logs-mydata\\"\\n },\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"container.name\\": {\\n \\"type\\": \\"keyword\\"\\n }\\n }\\n }\\n }\\n}\\n```\\n\\nThe above template is applied to all data that is shipped to logs-mydata-\\\\*. We have mapped container.name as a keyword, as this is the field we will be using for routing later on. Now, we send a document to the data stream and it will be ingested into logs-mydata-default:\\n\\n```bash\\nPOST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-05-25T12:26:23+00:00\\",\\n \\"container\\": {\\n \\"name\\": \\"foo\\"\\n }\\n}\\n```\\n\\nWe can check that it was ingested with the command below, which will show 1 result.\\n\\n```bash\\nGET logs-mydata-default/_search\\n```\\n\\nWithout modifying the routing processor, this already allows us to route documents. As soon as the reroute processor is specified, it will look for data_stream.dataset and data_stream.namespace fields by default and will send documents to the corresponding data stream, according to the [data stream naming scheme](https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme) logs-\\\\-\\\\. Let’s try this out:\\n\\n```bash\\nPOST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-03-30T12:27:23+00:00\\",\\n \\"container\\": {\\n\\"name\\": \\"foo\\"\\n },\\n \\"data_stream\\": {\\n \\"dataset\\": \\"myotherdata\\"\\n }\\n}\\n```\\n\\nAs can be seen with the GET logs-mydata-default/\\\\_search command, this document ended up in the logs-myotherdata-default data stream. But instead of using default rules, we want to create our own rules for the field container.name. If the field is container.name = foo, we want to send it to logs-foo-default. For this we modify our routing pipeline:\\n\\n```bash\\nPUT _ingest/pipeline/logs-mydata\\n{\\n \\"description\\": \\"Routing for mydata\\",\\n \\"processors\\": [\\n {\\n \\"reroute\\": {\\n \\"tag\\": \\"foo\\",\\n \\"if\\" : \\"ctx.container?.name == \'foo\'\\",\\n \\"dataset\\": \\"foo\\"\\n }\\n }\\n ]\\n}\\n```\\n\\nLet\'s test this with a document:\\n\\n```bash\\nPOST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-05-25T12:26:23+00:00\\",\\n \\"container\\": {\\n \\"name\\": \\"foo\\"\\n }\\n}\\n```\\n\\nWhile it would be possible to specify a routing rule for each container name, you can also route by the value of a field in the document:\\n\\n```bash\\nPUT _ingest/pipeline/logs-mydata\\n{\\n \\"description\\": \\"Routing for mydata\\",\\n \\"processors\\": [\\n {\\n \\"reroute\\": {\\n \\"tag\\": \\"mydata\\",\\n \\"dataset\\": [\\n \\"{{container.name}}\\",\\n \\"mydata\\"\\n ]\\n }\\n }\\n ]\\n}\\n```\\n\\nIn this example, we are using a field reference as a routing rule. If the container.name field exists in the document, it will be routed — otherwise it falls back to mydata. This can be tested with:\\n\\n```bash\\nPOST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-05-25T12:26:23+00:00\\",\\n \\"container\\": {\\n \\"name\\": \\"foo1\\"\\n }\\n}\\n\\nPOST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-05-25T12:26:23+00:00\\",\\n \\"container\\": {\\n \\"name\\": \\"foo2\\"\\n }\\n}\\n```\\n\\nThis creates the data streams logs-foo1-default and logs-foo2-default.\\n\\n_NOTE: There is currently a limitation in the processor that requires the fields specified in a `{{field.reference}}` to be in a nested object notation. A dotted field name does not currently work. Also, you’ll get errors when the document contains dotted field names for any_ _data_stream.\\\\*_ _field. This limitation will be_ [_fixed_](https://github.com/elastic/elasticsearch/pull/96243) _in 8.8.2 and 8.9.0._\\n\\n## API keys\\n\\nWhen using the reroute processor, it is important that the API keys specified have permissions for the source and target indices. For example, if a pattern is used for routing from logs-mydata-default, the API key must have write permissions for `logs-*-*` as data could end up in any of these indices (see example further down).\\n\\nWe’re currently [working](https://github.com/elastic/integrations/issues/5989) [on](https://github.com/elastic/integrations/issues/6255) extending the API key permissions for our [integrations](https://www.elastic.co/integrations/data-integrations) so that they allow for routing by default if you’re running a Fleet-managed Elastic Agent.\\n\\nIf you’re using a standalone Elastic Agent, or any other shipper, you can use this as a template to create your API key:\\n\\n```bash\\nPOST /_security/api_key\\n{\\n \\"name\\": \\"ingest_logs\\",\\n \\"role_descriptors\\": {\\n \\"ingest_logs\\": {\\n \\"cluster\\": [\\n \\"monitor\\"\\n ],\\n \\"indices\\": [\\n {\\n \\"names\\": [\\n \\"logs-*-*\\"\\n ],\\n \\"privileges\\": [\\n \\"auto_configure\\",\\n \\"create_doc\\"\\n ]\\n }\\n ]\\n }\\n }\\n}\\n```\\n\\n## Future plans\\n\\nIn Elasticsearch 8.8, the reroute processor was released in technical preview. The plan is to adopt this in our data sink integrations like syslog, k8s, and others. Elastic will provide default routing rules that just work out of the box, but it will also be possible for users to add their own rules. If you are using our integrations, follow [this guide](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html#pipelines-for-fleet-elastic-agent) on how to add a custom ingest pipeline.\\n\\n## Try it out!\\n\\nThis blog post has shown some sample use cases for document based routing. Try it out on your data by adjusting the commands for index templates and ingest pipelines to your own data, and get started with [Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home) through a 7-day free trial. Let us know via [this feedback form](https://ela.st/reroute-feedback) how you’re planning to use the [reroute processor](https://www.elastic.co/guide/en/elasticsearch/reference/current/reroute-processor.html) and whether you have suggestions for improvement.\\n","code":"var Component=(()=>{var u=Object.create;var o=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),y=(n,e)=>{for(var a in e)o(n,a,{get:e[a],enumerable:!0})},r=(n,e,a,i)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let s of m(e))!f.call(n,s)&&s!==a&&o(n,s,{get:()=>e[s],enumerable:!(i=p(e,s))||i.enumerable});return n};var b=(n,e,a)=>(a=n!=null?u(g(n)):{},r(e||!n||!n.__esModule?o(a,\\"default\\",{value:n,enumerable:!0}):a,n)),x=n=>r(o({},\\"__esModule\\",{value:!0}),n);var c=w((v,l)=>{l.exports=_jsx_runtime});var _={};y(_,{default:()=>d,frontmatter:()=>T});var t=b(c()),T={title:\\"Simplifying log data management: Harness the power of flexible routing with Elastic\\",slug:\\"simplifying-log-data-management-flexible-routing\\",date:\\"2023-06-13\\",description:\\"The reroute processor, available as of Elasticsearch 8.8, allows customizable rules for routing documents, such as logs, into data streams for better control of processing, retention, and permissions with examples that you can try on your own.\\",author:[{slug:\\"felix-barnsteiner\\"},{slug:\\"nicolas-ruflin\\"}],image:\\"observability-digital-transformation-1.jpg\\",tags:[{slug:\\"log-analytics\\"}]};function h(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"In Elasticsearch 8.8, we\\\\u2019re introducing the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/reroute-processor.html\\",rel:\\"nofollow\\",children:\\"reroute processor\\"}),\\" in technical preview that makes it possible to send documents, such as logs, to different \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/data-streams.html\\",rel:\\"nofollow\\",children:\\"data streams\\"}),\\", according to flexible routing rules. When using Elastic Observability, this gives you more granular control over your data with regard to retention, permissions, and processing with all the potential benefits of the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme\\",rel:\\"nofollow\\",children:\\"data stream naming scheme\\"}),\\". While optimized for data streams, the reroute processor also works with classic indices. This blog post contains examples on how to use the reroute processor that you can try on your own by executing the snippets in the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/console-kibana.html\\",rel:\\"nofollow\\",children:\\"Kibana dev tools\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic Observability offers a wide range of \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations?solution=observability\\",rel:\\"nofollow\\",children:\\"integrations\\"}),\\" that help you to monitor your applications and infrastructure. These integrations are added as policies to \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/fleet/current/elastic-agent-installation.html\\",rel:\\"nofollow\\",children:\\"Elastic agents\\"}),\\", which help ingest telemetry into Elastic Observability. Several examples of these integrations include the ability to ingest logs from systems that send a stream of logs from different applications, such as \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kinesis/current/aws-firehose-setup-guide.html\\",rel:\\"nofollow\\",children:\\"Amazon Kinesis Data Firehose\\"}),\\", \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/kubernetes\\",rel:\\"nofollow\\",children:\\"Kubernetes container logs\\"}),\\", and \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/tcp\\",rel:\\"nofollow\\",children:\\"syslog\\"}),\\". One challenge is that these multiplexed log streams are sending data to the same Elasticsearch data stream, such as logs-syslog-default. This makes it difficult to create parsing rules in ingest pipelines and dashboards for specific technologies, such as the ones from the \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/nginx\\",rel:\\"nofollow\\",children:\\"Nginx\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/apache\\",rel:\\"nofollow\\",children:\\"Apache\\"}),\\" integrations. That\\\\u2019s because in Elasticsearch, in combination with the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme\\",rel:\\"nofollow\\",children:\\"data stream naming scheme\\"}),\\", the processing and the schema are both encapsulated in a data stream.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The reroute processor helps you tease apart data from a generic data stream and send it to a more specific one. You may use that mechanism to send logs to a data stream that is set up by the Nginx integration, for example, so that the logs are parsed with that integration and you can use the integration\\\\u2019s prebuilt dashboards or create custom ones with the fields, such as the url, the status code, and the response time that the Nginx pipeline has parsed out of the Nginx log message. You can also split out/separate regular Nginx logs and errors with the reroute processor, providing further separation ability and categorization of logs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/simplifying-log-data-management-flexible-routing/blog-elastic-routing-pipeline.png\\",alt:\\"routing pipeline\\",width:\\"1412\\",height:\\"531\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"example-use-case\\",children:\\"Example use case\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To use the reroute processor, first:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Ensure you are on Elasticsearch 8.8\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Ensure you have permissions to manage indices and data streams\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you don\\\\u2019t already have an account on \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\", sign up for one\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Next, you\\\\u2019ll need to \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/master/set-up-a-data-stream.html\\",rel:\\"nofollow\\",children:\\"set up a data stream\\"}),\\" and create a custom Elasticsearch \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/master/ingest.html\\",rel:\\"nofollow\\",children:\\"ingest pipeline\\"}),\\" that is called as the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html#set-default-pipeline\\",rel:\\"nofollow\\",children:\\"default pipeline\\"}),\\". Below we go through this step by step for the \\\\u201Cmydata\\\\u201D data set that we\\\\u2019ll simulate ingesting container logs into. We start with a basic example and extend it from there.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The following steps should be utilized in the Elastic console, which is found at \\",(0,t.jsx)(e.strong,{children:\\"Management -> Dev tools -> Console\\"}),\\". First, we need an an ingest pipeline and a template for the data stream:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/logs-mydata\\n{\\n \\"description\\": \\"Routing for mydata\\",\\n \\"processors\\": [\\n {\\n \\"reroute\\": {\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This creates an ingest pipeline with an empty reroute processor. To make use of it, we need an index template:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _index_template/logs-mydata\\n{\\n \\"index_patterns\\": [\\n \\"logs-mydata-*\\"\\n ],\\n \\"data_stream\\": {},\\n \\"priority\\": 200,\\n \\"template\\": {\\n \\"settings\\": {\\n \\"index.default_pipeline\\": \\"logs-mydata\\"\\n },\\n \\"mappings\\": {\\n \\"properties\\": {\\n \\"container.name\\": {\\n \\"type\\": \\"keyword\\"\\n }\\n }\\n }\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The above template is applied to all data that is shipped to logs-mydata-*. We have mapped container.name as a keyword, as this is the field we will be using for routing later on. Now, we send a document to the data stream and it will be ingested into logs-mydata-default:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`POST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-05-25T12:26:23+00:00\\",\\n \\"container\\": {\\n \\"name\\": \\"foo\\"\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We can check that it was ingested with the command below, which will show 1 result.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`GET logs-mydata-default/_search\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Without modifying the routing processor, this already allows us to route documents. As soon as the reroute processor is specified, it will look for data_stream.dataset and data_stream.namespace fields by default and will send documents to the corresponding data stream, according to the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/an-introduction-to-the-elastic-data-stream-naming-scheme\\",rel:\\"nofollow\\",children:\\"data stream naming scheme\\"}),\\" logs--. Let\\\\u2019s try this out:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`POST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-03-30T12:27:23+00:00\\",\\n \\"container\\": {\\n\\"name\\": \\"foo\\"\\n },\\n \\"data_stream\\": {\\n \\"dataset\\": \\"myotherdata\\"\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As can be seen with the GET logs-mydata-default/_search command, this document ended up in the logs-myotherdata-default data stream. But instead of using default rules, we want to create our own rules for the field container.name. If the field is container.name = foo, we want to send it to logs-foo-default. For this we modify our routing pipeline:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/logs-mydata\\n{\\n \\"description\\": \\"Routing for mydata\\",\\n \\"processors\\": [\\n {\\n \\"reroute\\": {\\n \\"tag\\": \\"foo\\",\\n \\"if\\" : \\"ctx.container?.name == \'foo\'\\",\\n \\"dataset\\": \\"foo\\"\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\'s test this with a document:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`POST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-05-25T12:26:23+00:00\\",\\n \\"container\\": {\\n \\"name\\": \\"foo\\"\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"While it would be possible to specify a routing rule for each container name, you can also route by the value of a field in the document:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`PUT _ingest/pipeline/logs-mydata\\n{\\n \\"description\\": \\"Routing for mydata\\",\\n \\"processors\\": [\\n {\\n \\"reroute\\": {\\n \\"tag\\": \\"mydata\\",\\n \\"dataset\\": [\\n \\"{{container.name}}\\",\\n \\"mydata\\"\\n ]\\n }\\n }\\n ]\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this example, we are using a field reference as a routing rule. If the container.name field exists in the document, it will be routed \\\\u2014 otherwise it falls back to mydata. This can be tested with:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`POST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-05-25T12:26:23+00:00\\",\\n \\"container\\": {\\n \\"name\\": \\"foo1\\"\\n }\\n}\\n\\nPOST logs-mydata-default/_doc\\n{\\n \\"@timestamp\\": \\"2023-05-25T12:26:23+00:00\\",\\n \\"container\\": {\\n \\"name\\": \\"foo2\\"\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This creates the data streams logs-foo1-default and logs-foo2-default.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsxs)(e.em,{children:[\\"NOTE: There is currently a limitation in the processor that requires the fields specified in a \\",(0,t.jsx)(e.code,{children:\\"{{field.reference}}\\"}),\\" to be in a nested object notation. A dotted field name does not currently work. Also, you\\\\u2019ll get errors when the document contains dotted field names for any\\"]}),\\" \\",(0,t.jsx)(e.em,{children:\\"data_stream.*\\"}),\\" \\",(0,t.jsx)(e.em,{children:\\"field. This limitation will be\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elasticsearch/pull/96243\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.em,{children:\\"fixed\\"})}),\\" \\",(0,t.jsx)(e.em,{children:\\"in 8.8.2 and 8.9.0.\\"})]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"api-keys\\",children:\\"API keys\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"When using the reroute processor, it is important that the API keys specified have permissions for the source and target indices. For example, if a pattern is used for routing from logs-mydata-default, the API key must have write permissions for \\",(0,t.jsx)(e.code,{children:\\"logs-*-*\\"}),\\" as data could end up in any of these indices (see example further down).\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"We\\\\u2019re currently \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/integrations/issues/5989\\",rel:\\"nofollow\\",children:\\"working\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/integrations/issues/6255\\",rel:\\"nofollow\\",children:\\"on\\"}),\\" extending the API key permissions for our \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/integrations/data-integrations\\",rel:\\"nofollow\\",children:\\"integrations\\"}),\\" so that they allow for routing by default if you\\\\u2019re running a Fleet-managed Elastic Agent.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you\\\\u2019re using a standalone Elastic Agent, or any other shipper, you can use this as a template to create your API key:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`POST /_security/api_key\\n{\\n \\"name\\": \\"ingest_logs\\",\\n \\"role_descriptors\\": {\\n \\"ingest_logs\\": {\\n \\"cluster\\": [\\n \\"monitor\\"\\n ],\\n \\"indices\\": [\\n {\\n \\"names\\": [\\n \\"logs-*-*\\"\\n ],\\n \\"privileges\\": [\\n \\"auto_configure\\",\\n \\"create_doc\\"\\n ]\\n }\\n ]\\n }\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"future-plans\\",children:\\"Future plans\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In Elasticsearch 8.8, the reroute processor was released in technical preview. The plan is to adopt this in our data sink integrations like syslog, k8s, and others. Elastic will provide default routing rules that just work out of the box, but it will also be possible for users to add their own rules. If you are using our integrations, follow \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html#pipelines-for-fleet-elastic-agent\\",rel:\\"nofollow\\",children:\\"this guide\\"}),\\" on how to add a custom ingest pipeline.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This blog post has shown some sample use cases for document based routing. Try it out on your data by adjusting the commands for index templates and ingest pipelines to your own data, and get started with \\",(0,t.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" through a 7-day free trial. Let us know via \\",(0,t.jsx)(e.a,{href:\\"https://ela.st/reroute-feedback\\",rel:\\"nofollow\\",children:\\"this feedback form\\"}),\\" how you\\\\u2019re planning to use the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/reroute-processor.html\\",rel:\\"nofollow\\",children:\\"reroute processor\\"}),\\" and whether you have suggestions for improvement.\\"]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return x(_);})();\\n;return Component;"},"_id":"articles/simplifying-log-data-management-flexible-routing-elastic.mdx","_raw":{"sourceFilePath":"articles/simplifying-log-data-management-flexible-routing-elastic.mdx","sourceFileName":"simplifying-log-data-management-flexible-routing-elastic.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/simplifying-log-data-management-flexible-routing-elastic"},"type":"Article","imageUrl":"/assets/images/simplifying-log-data-management-flexible-routing/observability-digital-transformation-1.jpg","readingTime":"9 min read","url":"/simplifying-log-data-management-flexible-routing","headings":[{"level":2,"title":"Example use case","href":"#example-use-case"},{"level":2,"title":"API keys","href":"#api-keys"},{"level":2,"title":"Future plans","href":"#future-plans"},{"level":2,"title":"Try it out!","href":"#try-it-out"}]},{"title":"Smarter log analytics in Elastic Observability","slug":"smarter-log-analytics-in-elastic-observability","date":"2024-06-10","description":"Discover smarter log handling with Kibana\'s latest features! The new Data Source Selector lets you easily filter logs by integrations like System Logs and Nginx. Smart Fields enhance log analysis by presenting data more intuitively. Simplify your workflow and uncover deeper insights today!","image":"log-monitoring.jpeg","author":[{"slug":"achyut-jhunjhunwala","type":"Author","_raw":{}},{"slug":"mike-birnstiehl","type":"Author","_raw":{}}],"tags":[{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nDiscover a smarter way to handle your logs with Kibana\'s latest features! Our new Data Source selector makes it effortless to zero in on the logs you need, whether they\'re from System Logs or Application Logs by selecting your integrations or data views. Plus, with the introduction of Smart Fields, your log analysis is now more intuitive and insightful. Get ready to simplify your workflow and uncover deeper insights with these game-changing updates. Dive in and see how easy log exploration can be!\\n\\n![Smart fields](/assets/images/smarter-log-analytics-in-elastic-observability/smart-fields.png)\\n\\n## Find the logs you’re looking for\\n\\n### Focus on logs from specific integrations or data views\\n\\nWe\'ve added the Data Source selector, a handy new feature for viewing specific logs. Now, you can easily filter your logs based on your integrations, like System Logs, Nginx, or Elastic APM, or switch between different data views, like logs or metrics. This new selector is all about making your data easier to find and helping you focus on what matters most in your analysis.\\n\\n## Dive into your logs\\n\\n### Analyze logs with Smart Fields in Kibana\\n\\nLogs in Kibana have undergone a significant transformation, particularly in the way log data is presented. The once-basic table view has evolved with the introduction of Smart Fields, providing users with a more insightful and dynamic log analysis experience.\\n\\n#### Resource Smart Field - centralizing log source information\\n\\nThe resource column further elevates the Logs Explorer page by providing users with a single column for exploring the resource that created the log event. This column groups various resource-indicating fields together, streamlining the investigation process. Currently, the following [ECS](https://www.elastic.co/guide/en/ecs/current/ecs-reference.html) fields are grouped under this single column and we recommend including them in your logs:\\n* [service.name](https://www.elastic.co/guide/en/ecs/current/ecs-service.html#field-service-name)\\n* [container.name](https://www.elastic.co/guide/en/ecs/current/ecs-container.html#field-container-name)\\n* [orchestrator.namespace](https://www.elastic.co/guide/en/ecs/current/ecs-orchestrator.html#field-orchestrator-namespace)\\n* [host.name](https://www.elastic.co/guide/en/ecs/current/ecs-host.html#field-host-name)\\n* [cloud.instance.id](https://www.elastic.co/guide/en/ecs/current/ecs-cloud.html#field-cloud-instance-id)\\n\\nWe know this does not include all use cases and would like your feedback on other fields you use/are important for you to help us provide a tailored and user-centric log analysis experience.\\n\\n#### Content Smart Field - a deeper dive into log data\\n\\nThe content column revolutionizes log analysis by seamlessly rendering **log.level** and **message** fields. Notably, it automatically handles fallbacks, ensuring a smooth transition when the actual message field is not available. This enhancement simplifies the log exploration process, offering users a more comprehensive understanding of their data.\\n\\n#### Actions column - unleashing additional columns\\nAs part of our commitment to empowering users, we are introducing the actions column, adding a layer of functionality to the document table. This column includes two powerful actions:\\n\\n- **Degraded document indicator**: This indicator provides insights about the quality of your data by indicating fields were ignored when the document was indexed and ended up in the [\\\\_ignored](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-ignored-field.html) property of the document. To help analyze what caused the document to degrade, we suggest reading this blog - [The antidote for index mapping exceptions: ignore\\\\_malformed](https://www.elastic.co/observability-labs/blog/antidote-index-mapping-exceptions-ignore-malformed).\\n- **Stacktrace indicator**: This indicator informs users of the presence of stack traces in the document. This makes it easy to navigate through logs documents and know if they have additional information.\\n\\n### Investigate individual logs by expanding log details\\nNow, when you click the expand icon in the actions column, it opens up the **Log details** flyout for any log entry. This new feature gives you a detailed overview of the entry right at your fingertips. Inside the flyout, the **Overview** tab is neatly organized into four sections—Content breakdown, Service & Infrastructure, Cloud, and Others—each offering a snapshot of the most crucial information. Plus, you\'ll find the same handy controls you\'re used to in the main table, like filtering in or out, adding or removing columns, and copying data, making it easier than ever to manage your logs directly from the flyout.\\n\\nThe [Observability AI Assistant](https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html) is fully integrated into this view providing contextual insights about the log event and helping to find similar messages.\\n\\n## Experience a streamlined approach to log exploration\\nThese enhancements simplify the process of finding and focusing on specific logs and offer more intuitive and insightful data presentation. Dive into your logs with these I tools and streamline your workflow, uncovering deeper insights with ease. Try it now and transform your log analysis!\\n","code":"var Component=(()=>{var g=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var t in e)r(n,t,{get:e[t],enumerable:!0})},s=(n,e,t,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!p.call(n,o)&&o!==t&&r(n,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return n};var v=(n,e,t)=>(t=n!=null?g(f(n)):{},s(e||!n||!n.__esModule?r(t,\\"default\\",{value:n,enumerable:!0}):t,n)),b=n=>s(r({},\\"__esModule\\",{value:!0}),n);var c=y((T,l)=>{l.exports=_jsx_runtime});var k={};w(k,{default:()=>h,frontmatter:()=>x});var i=v(c()),x={title:\\"Smarter log analytics in Elastic Observability\\",slug:\\"smarter-log-analytics-in-elastic-observability\\",description:\\"Discover smarter log handling with Kibana\'s latest features! The new Data Source Selector lets you easily filter logs by integrations like System Logs and Nginx. Smart Fields enhance log analysis by presenting data more intuitively. Simplify your workflow and uncover deeper insights today!\\",author:[{slug:\\"achyut-jhunjhunwala\\"},{slug:\\"mike-birnstiehl\\"}],tags:[{slug:\\"log-analytics\\"}],date:\\"2024-06-10\\",image:\\"log-monitoring.jpeg\\"};function d(n){let e={a:\\"a\\",h2:\\"h2\\",h3:\\"h3\\",h4:\\"h4\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsx)(e.p,{children:\\"Discover a smarter way to handle your logs with Kibana\'s latest features! Our new Data Source selector makes it effortless to zero in on the logs you need, whether they\'re from System Logs or Application Logs by selecting your integrations or data views. Plus, with the introduction of Smart Fields, your log analysis is now more intuitive and insightful. Get ready to simplify your workflow and uncover deeper insights with these game-changing updates. Dive in and see how easy log exploration can be!\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/smarter-log-analytics-in-elastic-observability/smart-fields.png\\",alt:\\"Smart fields\\",width:\\"1999\\",height:\\"1039\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"find-the-logs-youre-looking-for\\",children:\\"Find the logs you\\\\u2019re looking for\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"focus-on-logs-from-specific-integrations-or-data-views\\",children:\\"Focus on logs from specific integrations or data views\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"We\'ve added the Data Source selector, a handy new feature for viewing specific logs. Now, you can easily filter your logs based on your integrations, like System Logs, Nginx, or Elastic APM, or switch between different data views, like logs or metrics. This new selector is all about making your data easier to find and helping you focus on what matters most in your analysis.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"dive-into-your-logs\\",children:\\"Dive into your logs\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"analyze-logs-with-smart-fields-in-kibana\\",children:\\"Analyze logs with Smart Fields in Kibana\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Logs in Kibana have undergone a significant transformation, particularly in the way log data is presented. The once-basic table view has evolved with the introduction of Smart Fields, providing users with a more insightful and dynamic log analysis experience.\\"}),`\\n`,(0,i.jsx)(e.h4,{id:\\"resource-smart-field---centralizing-log-source-information\\",children:\\"Resource Smart Field - centralizing log source information\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"The resource column further elevates the Logs Explorer page by providing users with a single column for exploring the resource that created the log event. This column groups various resource-indicating fields together, streamlining the investigation process. Currently, the following \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-reference.html\\",rel:\\"nofollow\\",children:\\"ECS\\"}),\\" fields are grouped under this single column and we recommend including them in your logs:\\"]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-service.html#field-service-name\\",rel:\\"nofollow\\",children:\\"service.name\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-container.html#field-container-name\\",rel:\\"nofollow\\",children:\\"container.name\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-orchestrator.html#field-orchestrator-namespace\\",rel:\\"nofollow\\",children:\\"orchestrator.namespace\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-host.html#field-host-name\\",rel:\\"nofollow\\",children:\\"host.name\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/ecs/current/ecs-cloud.html#field-cloud-instance-id\\",rel:\\"nofollow\\",children:\\"cloud.instance.id\\"})}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"We know this does not include all use cases and would like your feedback on other fields you use/are important for you to help us provide a tailored and user-centric log analysis experience.\\"}),`\\n`,(0,i.jsx)(e.h4,{id:\\"content-smart-field---a-deeper-dive-into-log-data\\",children:\\"Content Smart Field - a deeper dive into log data\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"The content column revolutionizes log analysis by seamlessly rendering \\",(0,i.jsx)(e.strong,{children:\\"log.level\\"}),\\" and \\",(0,i.jsx)(e.strong,{children:\\"message\\"}),\\" fields. Notably, it automatically handles fallbacks, ensuring a smooth transition when the actual message field is not available. This enhancement simplifies the log exploration process, offering users a more comprehensive understanding of their data.\\"]}),`\\n`,(0,i.jsx)(e.h4,{id:\\"actions-column---unleashing-additional-columns\\",children:\\"Actions column - unleashing additional columns\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"As part of our commitment to empowering users, we are introducing the actions column, adding a layer of functionality to the document table. This column includes two powerful actions:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Degraded document indicator\\"}),\\": This indicator provides insights about the quality of your data by indicating fields were ignored when the document was indexed and ended up in the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-ignored-field.html\\",rel:\\"nofollow\\",children:\\"_ignored\\"}),\\" property of the document. To help analyze what caused the document to degrade, we suggest reading this blog - \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/antidote-index-mapping-exceptions-ignore-malformed\\",rel:\\"nofollow\\",children:\\"The antidote for index mapping exceptions: ignore_malformed\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Stacktrace indicator\\"}),\\": This indicator informs users of the presence of stack traces in the document. This makes it easy to navigate through logs documents and know if they have additional information.\\"]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"investigate-individual-logs-by-expanding-log-details\\",children:\\"Investigate individual logs by expanding log details\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Now, when you click the expand icon in the actions column, it opens up the \\",(0,i.jsx)(e.strong,{children:\\"Log details\\"}),\\" flyout for any log entry. This new feature gives you a detailed overview of the entry right at your fingertips. Inside the flyout, the \\",(0,i.jsx)(e.strong,{children:\\"Overview\\"}),\\" tab is neatly organized into four sections\\\\u2014Content breakdown, Service & Infrastructure, Cloud, and Others\\\\u2014each offering a snapshot of the most crucial information. Plus, you\'ll find the same handy controls you\'re used to in the main table, like filtering in or out, adding or removing columns, and copying data, making it easier than ever to manage your logs directly from the flyout.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"The \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html\\",rel:\\"nofollow\\",children:\\"Observability AI Assistant\\"}),\\" is fully integrated into this view providing contextual insights about the log event and helping to find similar messages.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"experience-a-streamlined-approach-to-log-exploration\\",children:\\"Experience a streamlined approach to log exploration\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"These enhancements simplify the process of finding and focusing on specific logs and offer more intuitive and insightful data presentation. Dive into your logs with these I tools and streamline your workflow, uncovering deeper insights with ease. Try it now and transform your log analysis!\\"})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,i.jsx)(e,{...n,children:(0,i.jsx)(d,{...n})}):d(n)}return b(k);})();\\n;return Component;"},"_id":"articles/smarter-log-analytics-in-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/smarter-log-analytics-in-elastic-observability.mdx","sourceFileName":"smarter-log-analytics-in-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/smarter-log-analytics-in-elastic-observability"},"type":"Article","imageUrl":"/assets/images/smarter-log-analytics-in-elastic-observability/log-monitoring.jpeg","readingTime":"4 min read","url":"/smarter-log-analytics-in-elastic-observability","headings":[{"level":2,"title":"Find the logs you’re looking for","href":"#find-the-logs-youre-looking-for"},{"level":3,"title":"Focus on logs from specific integrations or data views","href":"#focus-on-logs-from-specific-integrations-or-data-views"},{"level":2,"title":"Dive into your logs","href":"#dive-into-your-logs"},{"level":3,"title":"Analyze logs with Smart Fields in Kibana","href":"#analyze-logs-with-smart-fields-in-kibana"},{"level":4,"title":"Resource Smart Field - centralizing log source information","href":"#resource-smart-field---centralizing-log-source-information"},{"level":4,"title":"Content Smart Field - a deeper dive into log data","href":"#content-smart-field---a-deeper-dive-into-log-data"},{"level":4,"title":"Actions column - unleashing additional columns","href":"#actions-column---unleashing-additional-columns"},{"level":3,"title":"Investigate individual logs by expanding log details","href":"#investigate-individual-logs-by-expanding-log-details"},{"level":2,"title":"Experience a streamlined approach to log exploration","href":"#experience-a-streamlined-approach-to-log-exploration"}]},{"title":"Enhancing SRE troubleshooting with the AI Assistant for Observability and your organization\'s runbooks","slug":"sre-troubleshooting-ai-assistant-observability-runbooks","date":"2023-11-08","description":"Empower your SRE team with this guide to enriching Elastic\'s AI Assistant Knowledge Base with your organization\'s internal observability information for enhanced alert remediation and incident management.","image":"11-hand.jpg","author":[{"slug":"almudena-sanz-olive","type":"Author","_raw":{}},{"slug":"katrin-freihofner","type":"Author","_raw":{}},{"slug":"tom-grabowski","type":"Author","_raw":{}}],"subtitle":"With this guide, empower your SRE team to achieve enhanced alert remediation and incident management","tags":[{"slug":"kubernetes","type":"Tag","_raw":{}},{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"ai-assistant","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}},{"slug":"rag","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe [Observability AI Assistant](https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability) helps users explore and analyze observability data using a natural language interface, by leveraging automatic function calling to request, analyze, and visualize your data to transform it into actionable observability. The Assistant can also set up a Knowledge Base, powered by [Elastic Learned Sparse EncodeR](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html) (ELSER) to provide additional context and recommendations from private data, alongside the large language models (LLMs) using RAG (Retrieval Augmented Generation). Elastic’s Stack — as a vector database with out-of-the-box semantic search and connectors to LLM integrations and the Observability solution — is the perfect toolkit to extract the maximum value of combining your company\'s unique observability knowledge with generative AI.\\n\\n## Enhanced troubleshooting for SREs\\n\\nSite reliability engineers (SRE) in large organizations often face challenges in locating necessary information for troubleshooting alerts, monitoring systems, or deriving insights due to scattered and potentially outdated resources. This issue is particularly significant for less experienced SREs who may require assistance even with the presence of a runbook. Recurring incidents pose another problem, as the on-call individual may lack knowledge about previous resolutions and subsequent steps. Mature SRE teams often invest considerable time in system improvements to minimize \\"fire-fighting,\\" utilizing extensive automation and documentation to support on-call personnel.\\n\\nElastic\xae addresses these challenges by combining generative AI models with relevant search results from your internal data using RAG. The [Observability AI Assistant\'s internal Knowledge Base](https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html), powered by our semantic search retrieval model [ELSER](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html), can recall information at any point during a conversation, providing RAG responses based on internal knowledge.\\n\\nThis Knowledge Base can be enriched with your organization\'s information, such as runbooks, GitHub issues, internal documentation, and Slack messages, allowing the AI Assistant to provide specific assistance. The Assistant can also document and store specific information from an ongoing conversation with an SRE while troubleshooting issues, effectively creating runbooks for future reference. Furthermore, the Assistant can generate summaries of incidents, system status, runbooks, post-mortems, or public announcements.\\n\\nThis ability to retrieve, summarize, and present contextually relevant information is a game-changer for SRE teams, transforming the work from chasing documents and data to an intuitive, contextually sensitive user experience.The Knowledge Base (see [requirements](https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html#obs-ai-requirements)) serves as a central repository of Observability knowledge, breaking documentation silos and integrating tribal knowledge, making this information accessible to SREs enhanced with the power of LLMs.\\n\\nYour LLM provider may collect query telemetry when using the AI Assistant. If your data is confidential or has sensitive details, we recommend you verify the data treatment policy of the LLM connector you provided to the AI Assistant.\\n\\nIn this blog post, we will cover different ways to enrich your Knowledge Base (KB) with internal information. We will focus on a specific alert, indicating that there was an increase in logs with “502 Bad Gateway” errors that has surpassed the alert’s threshold.\\n\\n![1 - threshold breached](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-1.png)\\n\\n## How to troubleshoot an alert with the Knowledge Base\\n\\nBefore the KB has been enriched with internal information, when the SRE asks the AI Assistant about how to troubleshoot an alert, the response from the LLM will be based on the data it learned during training; however, the LLM is not able to answer questions related to private, recent, or emerging knowledge. In this case, when asking for the steps to troubleshoot the alert, the response will be based on generic information.\\n\\n![2 - troubleshooting steps](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-2.png)\\n\\nHowever, once the KB has been enriched with your runbooks, when your team receives a new alert on “502 Bad Gateway” Errors, they can use AI Assistant to access the internal knowledge to troubleshoot it, using semantic search to find the appropriate runbook in the Knowledge Base.\\n\\nIn this blog, we will cover different ways to add internal information on how to troubleshoot an alert to the Knowledge Base:\\n\\n1. Ask the assistant to remember the content of an existing runbook.\\n\\n2. Ask the Assistant to summarize and store in the Knowledge Base the steps taken during a conversation and store it as a runbook.\\n\\n3. Import your runbooks from GitHub or another external source to the Knowledge Base using our Connector and APIs.\\n\\nAfter the runbooks have been added to the KB, the AI Assistant is now able to recall the internal and specific information in the runbooks. By leveraging the retrieved information, the LLM could provide more accurate and relevant recommendations for troubleshooting the alert. This could include suggesting potential causes for the alert, steps to resolve the issue, preventative measures for future incidents, or asking the assistant to help execute the steps mentioned in the runbook using functions. With more accurate and relevant information at hand, the SRE could potentially resolve the alert more quickly, reducing downtime and improving service reliability.\\n\\n![3 - troubleshooting 502 Bad gateway](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/Screenshot_2023-11-10_at_9.52.38_AM.png)\\n\\n![4 - (5) test the backend directly](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-4.png)\\n\\nYour Knowledge Base documents will be stored in the indices _.kibana-observability-ai-assistant-kb-_\\\\*. Have in mind that LLMs have restrictions on the amount of information the model can read and write at once, called token limit. Imagine you\'re reading a book, but you can only remember a certain number of words at a time. Once you\'ve reached that limit, you start to forget the earlier words you\'ve read. That\'s similar to how a token limit works in an LLM.\\n\\nTo keep runbooks within the token limit for Retrieval Augmented Generation (RAG) models, ensure the information is concise and relevant. Use bullet points for clarity, avoid repetition, and use links for additional information. Regularly review and update the runbooks to remove outdated or irrelevant information. The goal is to provide clear, concise, and effective troubleshooting information without compromising the quality due to token limit constraints. LLMs are great for summarization, so you could ask the AI Assistant to help you make the runbooks more concise.\\n\\n## Ask the assistant to remember the content of an existing runbook\\n\\nThe easiest way to store a runbook into the Knowledge Base is to just ask the AI Assistant to do it! Open a new conversation and ask “Can you store this runbook in the KB for future reference?” followed by pasting the content of the runbook in plain text.\\n\\n![5 - new conversation - let\'s work on this together](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-5.png)\\n\\n![6 - new converastion](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-6.png)\\n\\nThe AI Assistant will then store it in the Knowledge Base for you automatically, as simple as that.\\n\\n![7 - storing a runbook](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-7.png)\\n\\n## Ask the Assistant to summarize and store the steps taken during a conversation in the Knowledge Base\\n\\nYou can also ask the AI Assistant to remember something while having a conversation — for example, after you have troubleshooted an alert using the AI Assistant, you could ask to \\"remember how to troubleshoot this alert for next time.\\" The AI Assistant will create a summary of the steps taken to troubleshoot the alert and add it to the Knowledge Base, effectively creating runbooks for future reference. Next time you are faced with a similar situation, the AI Assistant will recall this information and use it to assist you.\\n\\nIn the following demo, the user asks the Assistant to remember the steps that have been followed to troubleshoot the root cause of an alert, and also to ping the Slack channel when this happens again. In a later conversation with the Assistant, the user asks what can be done about a similar problem, and the AI Assistant is able to remember the steps and also reminds the user to ping the Slack channel.\\n\\nAfter receiving the alert, you can open the AI Assistant chat and test troubleshooting the alert. After investigating an alert, ask the AI Assistant to summarize the analysis and the steps taken to root cause. To remember them for the next time, we have a similar alert and add extra instruction like to warn the Slack channel.\\n\\n![8. -teal box](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-8.png)\\n\\nThe Assistant will use the built-in functions to summarize the steps and store them into your Knowledge Base, so they can be recalled in future conversations.\\n\\n![9 - Elastic assistant chat (CROP)](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/Screenshot_2023-11-08_at_11.34.08_AM.png)\\n\\nOpen a new conversation, and ask what are the steps to take when troubleshooting a similar alert to the one we just investigated. The Assistant will be able to recall the information stored in the KB that is related to the specific alert, using semantic search based on [ELSER](https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html), and provide a summary of the steps taken to troubleshoot it, including the last indication of informing the Slack channel.\\n\\n\\n\\n## Import your runbooks stored in GitHub to the Knowledge Base using APIs or our GitHub Connector\\n\\nYou can also add proprietary data into the Knowledge Base programmatically by ingesting it (e.g., GitHub Issues, Markdown files, Jira tickets, text files) into Elastic.\\n\\nIf your organization has created runbooks that are stored in Markdown documents in GitHub, follow the steps in the next section of this blog post to index the runbook documents into your Knowledge Base.\\n\\n![10 - github handling 502](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-10.png)\\n\\nThe steps to ingest documents into the Knowledge Base are the following:\\n\\n![11 - using internal knowledge](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-11.png)\\n\\n### Ingest your organization’s knowledge into Elasticsearch\\n\\n**Option 1:** **Use the** [**Elastic web crawler**](https://www.elastic.co/guide/en/enterprise-search/current/crawler.html) **.** Use the web crawler to programmatically discover, extract, and index searchable content from websites and knowledge bases. When you ingest data with the web crawler, a search-optimized [Elasticsearch\xae index](https://www.elastic.co/blog/what-is-an-elasticsearch-index) is created to hold and sync webpage content.\\n\\n**Option 2: Use Elasticsearch\'s** [**Index API**](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html) **.** [Watch tutorials](https://www.elastic.co/guide/en/cloud/current/ec-ingest-guides.html) that demonstrate how you can use the Elasticsearch language clients to ingest data from an application.\\n\\n**Option 3: Build your own connector.** Follow the steps described in this blog: [How to create customized connectors for Elasticsearch](https://www.elastic.co/search-labs/how-to-create-customized-connectors-for-elasticsearch).\\n\\n**Option 4: Use Elasticsearch** [**Workplace Search connectors**](https://www.elastic.co/guide/en/workplace-search/current/workplace-search-content-sources.html) **.** For example, the [GitHub connector](https://www.elastic.co/guide/en/workplace-search/current/workplace-search-github-connector.html) can automatically capture, sync, and index issues, Markdown files, pull requests, and repos.\\n\\n- Follow the steps to [configure the GitHub Connector in GitHub](https://www.elastic.co/guide/en/workplace-search/current/workplace-search-github-connector.html#github-configuration) to create an OAuth App from the GitHub platform.\\n\\n![12 - elastic workplace search](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-12.png)\\n\\n- Now you can connect a GitHub instance to your organization. Head to your organization’s **Search \\\\> Workplace Search** administrative dashboard, and locate the Sources tab.\\n\\n![13 - screenshot](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/Screenshot_2023-11-08_at_10.19.19_AM.png)\\n\\n- Select **GitHub** (or GitHub Enterprise) in the Configured Sources list, and follow the GitHub authentication flow as presented. Upon the successful authentication flow, you will be redirected to Workplace Search and will be prompted to select the Organization you would like to synchronize.\\n\\n![14 - configure and connect](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-14.png)\\n\\n![15 - how to add github](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-15.png)\\n\\n![16 - github](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-16.png)\\n\\n- After configuring the connector and selecting the organization, the content should be synchronized and you will be able to see it in Sources. If you don’t need to index all the available content, you can specify the indexing rules via the API. This will help shorten indexing times and limit the size of the index. See [Customizing indexing](https://www.elastic.co/guide/en/workplace-search/current/workplace-search-customizing-indexing-rules.html).\\n\\n![17 - source overview](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-17.png)\\n\\n- The source has created an index in Elastic with the content (Issues, Markdown Files…) from your organization. You can find the index name by navigating to **Stack Management \\\\> Index Management** , activating the **Include hidden Indices** button on the right, and searching for “GitHub.”\\n\\n![18 - index mgmt](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-18.png)\\n\\n- You can explore the documents you have indexed by creating a Data View and exploring it in Discover. Go to **Stack Management \\\\> Kibana \\\\> Data Views \\\\> Create data view** and introduce the data view Name, Index pattern (make sure you activate “Allow hidden and system indices” in advanced options), and Timestamp field:\\n\\n![19 - create data view](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-19.png)\\n\\n- You can now explore the documents in Discover using the data view:\\n\\n![20 - data view](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-20.png)\\n\\n### Reindex your internal runbooks into the AI Assistant’s Knowledge Base Index, using it\'s semantic search pipeline\\n\\nYour Knowledge Base documents are stored in the indices _.kibana-observability-ai-assistant-kb-\\\\*_. To add your internal runbooks imported from GitHub to the KB, you just need to reindex the documents from the index you created in the previous step to the KB’s index. To add the semantic search capabilities to the documents in the KB, the reindex should also use the ELSER pipeline preconfigured for the KB, _.kibana-observability-ai-assistant-kb-ingest-pipeline_.\\n\\nBy creating a Data View with the KB index, you can explore the content in Discover.\\n\\nYou execute the query below in **Management \\\\> Dev Tools** , making sure to replace the following, both on “\\\\_source” and “inline”:\\n\\n- InternalDocsIndex : name of the index where your internal docs are stored\\n- text_field : name of the field with the text of your internal docs\\n- timestamp : name of the field of the timestamp in your internal docs\\n- public : (true or false) if true, makes a document available to all users in the defined [Kibana Space](https://www.elastic.co/guide/en/kibana/current/xpack-spaces.html) (if is defined) or in all spaces (if is not defined); if false, document will be restricted to the user indicated in\\n- (optional) space : if defined, restricts the internal document to be available in a specific [Kibana Space](https://www.elastic.co/guide/en/kibana/current/xpack-spaces.html)\\n- (optional) user.name : if defined, restricts the internal document to be available for a specific user\\n- (optional) \\"query\\" filter to index only certain docs (see below)\\n\\n```bash\\nPOST _reindex\\n{\\n \\"source\\": {\\n \\"index\\": \\"\\",\\n \\"_source\\": [\\n \\"\\",\\n \\"\\",\\n \\"namespace\\",\\n \\"is_correction\\",\\n \\"public\\",\\n \\"confidence\\"\\n ]\\n },\\n \\"dest\\": {\\n \\"index\\": \\".kibana-observability-ai-assistant-kb-000001\\",\\n \\"pipeline\\": \\".kibana-observability-ai-assistant-kb-ingest-pipeline\\"\\n },\\n \\"script\\": {\\n \\"inline\\": \\"ctx._source.text=ctx._source.remove(\\\\\\"\\\\\\");ctx._source.namespace=\\\\\\"\\\\\\";ctx._source.is_correction=false;ctx._source.public=;ctx._source.confidence=\\\\\\"high\\\\\\";ctx._source[\'@timestamp\']=ctx._source.remove(\\\\\\"\\\\\\");ctx._source[\'user.name\'] = \\\\\\"\\\\\\"\\"\\n }\\n}\\n```\\n\\nYou may want to specify the type of documents that you reindex in the KB — for example, you may only want to reindex Markdown documents (like Runbooks). You can add a “query” filter to the documents in the source. In the case of GitHub, runbooks are identified with the “type” field containing the string “file,” and you could add that to the reindex query like indicated below. To add also GitHub Issues, you can also include in the query “type” field containing the string “issues”:\\n\\n```json\\n\\"source\\": {\\n \\"index\\": \\"\\",\\n \\"_source\\": [\\n \\"\\",\\n \\"\\",\\n \\"namespace\\",\\n \\"is_correction\\",\\n \\"public\\",\\n \\"confidence\\"\\n ],\\n \\"query\\": {\\n \\"terms\\": {\\n \\"type\\": [\\"file\\"]\\n }\\n }\\n```\\n\\nGreat! Now that the data is stored in your Knowledge Base, you can ask the Observability AI Assistant any questions about it:\\n\\n![21 - new conversation](/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-21.png)\\n\\n\\n\\n\\n\\n## Conclusion\\n\\nIn conclusion, leveraging internal Observability knowledge and adding it to the Elastic Knowledge Base can greatly enhance the capabilities of the AI Assistant. By manually inputting information or programmatically ingesting documents, SREs can create a central repository of knowledge accessible through the power of Elastic and LLMs. The AI Assistant can recall this information, assist with incidents, and provide tailored observability to specific contexts using Retrieval Augmented Generation. By following the steps outlined in this article, organizations can unlock the full potential of their Elastic AI Assistant.\\n\\n[Start enriching your Knowledge Base with the Elastic AI Assistant today](https://www.elastic.co/generative-ai/ai-assistant) and empower your SRE team with the tools they need to excel. Follow the steps outlined in this article and take your incident management and alert remediation processes to the next level. Your journey toward a more efficient and effective SRE operation begins now.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n\\n_In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use._\\n\\n_Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners._\\n","code":"var Component=(()=>{var u=Object.create;var s=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var b=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var i in e)s(n,i,{get:e[i],enumerable:!0})},r=(n,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!p.call(n,o)&&o!==i&&s(n,o,{get:()=>e[o],enumerable:!(a=g(e,o))||a.enumerable});return n};var y=(n,e,i)=>(i=n!=null?u(b(n)):{},r(e||!n||!n.__esModule?s(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>r(s({},\\"__esModule\\",{value:!0}),n);var c=w((E,l)=>{l.exports=_jsx_runtime});var A={};f(A,{default:()=>d,frontmatter:()=>k});var t=y(c()),k={title:\\"Enhancing SRE troubleshooting with the AI Assistant for Observability and your organization\'s runbooks\\",slug:\\"sre-troubleshooting-ai-assistant-observability-runbooks\\",date:\\"2023-11-08\\",subtitle:\\"With this guide, empower your SRE team to achieve enhanced alert remediation and incident management\\",description:\\"Empower your SRE team with this guide to enriching Elastic\'s AI Assistant Knowledge Base with your organization\'s internal observability information for enhanced alert remediation and incident management.\\",author:[{slug:\\"almudena-sanz-olive\\"},{slug:\\"katrin-freihofner\\"},{slug:\\"tom-grabowski\\"}],image:\\"11-hand.jpg\\",tags:[{slug:\\"kubernetes\\"},{slug:\\"opentelemetry\\"},{slug:\\"ai-assistant\\"},{slug:\\"genai\\"},{slug:\\"rag\\"}]};function h(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components},{Video:i}=e;return i||x(\\"Video\\",!0),(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/context-aware-insights-elastic-ai-assistant-observability\\",rel:\\"nofollow\\",children:\\"Observability AI Assistant\\"}),\\" helps users explore and analyze observability data using a natural language interface, by leveraging automatic function calling to request, analyze, and visualize your data to transform it into actionable observability. The Assistant can also set up a Knowledge Base, powered by \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html\\",rel:\\"nofollow\\",children:\\"Elastic Learned Sparse EncodeR\\"}),\\" (ELSER) to provide additional context and recommendations from private data, alongside the large language models (LLMs) using RAG (Retrieval Augmented Generation). Elastic\\\\u2019s Stack \\\\u2014 as a vector database with out-of-the-box semantic search and connectors to LLM integrations and the Observability solution \\\\u2014 is the perfect toolkit to extract the maximum value of combining your company\'s unique observability knowledge with generative AI.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"enhanced-troubleshooting-for-sres\\",children:\\"Enhanced troubleshooting for SREs\\"}),`\\n`,(0,t.jsx)(e.p,{children:\'Site reliability engineers (SRE) in large organizations often face challenges in locating necessary information for troubleshooting alerts, monitoring systems, or deriving insights due to scattered and potentially outdated resources. This issue is particularly significant for less experienced SREs who may require assistance even with the presence of a runbook. Recurring incidents pose another problem, as the on-call individual may lack knowledge about previous resolutions and subsequent steps. Mature SRE teams often invest considerable time in system improvements to minimize \\"fire-fighting,\\" utilizing extensive automation and documentation to support on-call personnel.\'}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic\\\\xAE addresses these challenges by combining generative AI models with relevant search results from your internal data using RAG. The \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html\\",rel:\\"nofollow\\",children:\\"Observability AI Assistant\'s internal Knowledge Base\\"}),\\", powered by our semantic search retrieval model \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html\\",rel:\\"nofollow\\",children:\\"ELSER\\"}),\\", can recall information at any point during a conversation, providing RAG responses based on internal knowledge.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This Knowledge Base can be enriched with your organization\'s information, such as runbooks, GitHub issues, internal documentation, and Slack messages, allowing the AI Assistant to provide specific assistance. The Assistant can also document and store specific information from an ongoing conversation with an SRE while troubleshooting issues, effectively creating runbooks for future reference. Furthermore, the Assistant can generate summaries of incidents, system status, runbooks, post-mortems, or public announcements.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This ability to retrieve, summarize, and present contextually relevant information is a game-changer for SRE teams, transforming the work from chasing documents and data to an intuitive, contextually sensitive user experience.The Knowledge Base (see \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/obs-ai-assistant.html#obs-ai-requirements\\",rel:\\"nofollow\\",children:\\"requirements\\"}),\\") serves as a central repository of Observability knowledge, breaking documentation silos and integrating tribal knowledge, making this information accessible to SREs enhanced with the power of LLMs.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Your LLM provider may collect query telemetry when using the AI Assistant. If your data is confidential or has sensitive details, we recommend you verify the data treatment policy of the LLM connector you provided to the AI Assistant.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog post, we will cover different ways to enrich your Knowledge Base (KB) with internal information. We will focus on a specific alert, indicating that there was an increase in logs with \\\\u201C502 Bad Gateway\\\\u201D errors that has surpassed the alert\\\\u2019s threshold.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-1.png\\",alt:\\"1 - threshold breached\\",width:\\"1648\\",height:\\"1338\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"how-to-troubleshoot-an-alert-with-the-knowledge-base\\",children:\\"How to troubleshoot an alert with the Knowledge Base\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before the KB has been enriched with internal information, when the SRE asks the AI Assistant about how to troubleshoot an alert, the response from the LLM will be based on the data it learned during training; however, the LLM is not able to answer questions related to private, recent, or emerging knowledge. In this case, when asking for the steps to troubleshoot the alert, the response will be based on generic information.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-2.png\\",alt:\\"2 - troubleshooting steps\\",width:\\"944\\",height:\\"1158\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"However, once the KB has been enriched with your runbooks, when your team receives a new alert on \\\\u201C502 Bad Gateway\\\\u201D Errors, they can use AI Assistant to access the internal knowledge to troubleshoot it, using semantic search to find the appropriate runbook in the Knowledge Base.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In this blog, we will cover different ways to add internal information on how to troubleshoot an alert to the Knowledge Base:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Ask the assistant to remember the content of an existing runbook.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Ask the Assistant to summarize and store in the Knowledge Base the steps taken during a conversation and store it as a runbook.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Import your runbooks from GitHub or another external source to the Knowledge Base using our Connector and APIs.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"After the runbooks have been added to the KB, the AI Assistant is now able to recall the internal and specific information in the runbooks. By leveraging the retrieved information, the LLM could provide more accurate and relevant recommendations for troubleshooting the alert. This could include suggesting potential causes for the alert, steps to resolve the issue, preventative measures for future incidents, or asking the assistant to help execute the steps mentioned in the runbook using functions. With more accurate and relevant information at hand, the SRE could potentially resolve the alert more quickly, reducing downtime and improving service reliability.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/Screenshot_2023-11-10_at_9.52.38_AM.png\\",alt:\\"3 - troubleshooting 502 Bad gateway\\",width:\\"962\\",height:\\"1352\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-4.png\\",alt:\\"4 - (5) test the backend directly\\",width:\\"968\\",height:\\"1204\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Your Knowledge Base documents will be stored in the indices \\",(0,t.jsx)(e.em,{children:\\".kibana-observability-ai-assistant-kb-\\"}),\\"*. Have in mind that LLMs have restrictions on the amount of information the model can read and write at once, called token limit. Imagine you\'re reading a book, but you can only remember a certain number of words at a time. Once you\'ve reached that limit, you start to forget the earlier words you\'ve read. That\'s similar to how a token limit works in an LLM.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"To keep runbooks within the token limit for Retrieval Augmented Generation (RAG) models, ensure the information is concise and relevant. Use bullet points for clarity, avoid repetition, and use links for additional information. Regularly review and update the runbooks to remove outdated or irrelevant information. The goal is to provide clear, concise, and effective troubleshooting information without compromising the quality due to token limit constraints. LLMs are great for summarization, so you could ask the AI Assistant to help you make the runbooks more concise.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"ask-the-assistant-to-remember-the-content-of-an-existing-runbook\\",children:\\"Ask the assistant to remember the content of an existing runbook\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The easiest way to store a runbook into the Knowledge Base is to just ask the AI Assistant to do it! Open a new conversation and ask \\\\u201CCan you store this runbook in the KB for future reference?\\\\u201D followed by pasting the content of the runbook in plain text.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-5.png\\",alt:\\"5 - new conversation - let\'s work on this together\\",width:\\"1572\\",height:\\"1150\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-6.png\\",alt:\\"6 - new converastion\\",width:\\"1578\\",height:\\"1158\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The AI Assistant will then store it in the Knowledge Base for you automatically, as simple as that.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-7.png\\",alt:\\"7 - storing a runbook\\",width:\\"1630\\",height:\\"1310\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"ask-the-assistant-to-summarize-and-store-the-steps-taken-during-a-conversation-in-the-knowledge-base\\",children:\\"Ask the Assistant to summarize and store the steps taken during a conversation in the Knowledge Base\\"}),`\\n`,(0,t.jsx)(e.p,{children:\'You can also ask the AI Assistant to remember something while having a conversation \\\\u2014 for example, after you have troubleshooted an alert using the AI Assistant, you could ask to \\"remember how to troubleshoot this alert for next time.\\" The AI Assistant will create a summary of the steps taken to troubleshoot the alert and add it to the Knowledge Base, effectively creating runbooks for future reference. Next time you are faced with a similar situation, the AI Assistant will recall this information and use it to assist you.\'}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the following demo, the user asks the Assistant to remember the steps that have been followed to troubleshoot the root cause of an alert, and also to ping the Slack channel when this happens again. In a later conversation with the Assistant, the user asks what can be done about a similar problem, and the AI Assistant is able to remember the steps and also reminds the user to ping the Slack channel.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"After receiving the alert, you can open the AI Assistant chat and test troubleshooting the alert. After investigating an alert, ask the AI Assistant to summarize the analysis and the steps taken to root cause. To remember them for the next time, we have a similar alert and add extra instruction like to warn the Slack channel.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-8.png\\",alt:\\"8. -teal box\\",width:\\"1999\\",height:\\"1127\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Assistant will use the built-in functions to summarize the steps and store them into your Knowledge Base, so they can be recalled in future conversations.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/Screenshot_2023-11-08_at_11.34.08_AM.png\\",alt:\\"9 - Elastic assistant chat (CROP)\\",width:\\"1868\\",height:\\"916\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Open a new conversation, and ask what are the steps to take when troubleshooting a similar alert to the one we just investigated. The Assistant will be able to recall the information stored in the KB that is related to the specific alert, using semantic search based on \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-elser.html\\",rel:\\"nofollow\\",children:\\"ELSER\\"}),\\", and provide a summary of the steps taken to troubleshoot it, including the last indication of informing the Slack channel.\\"]}),`\\n`,(0,t.jsx)(i,{vidyardUuid:\\"p14Ss8soJDkW8YoCtKPrQF\\",loop:!0}),`\\n`,(0,t.jsx)(e.h2,{id:\\"import-your-runbooks-stored-in-github-to-the-knowledge-base-using-apis-or-our-github-connector\\",children:\\"Import your runbooks stored in GitHub to the Knowledge Base using APIs or our GitHub Connector\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can also add proprietary data into the Knowledge Base programmatically by ingesting it (e.g., GitHub Issues, Markdown files, Jira tickets, text files) into Elastic.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If your organization has created runbooks that are stored in Markdown documents in GitHub, follow the steps in the next section of this blog post to index the runbook documents into your Knowledge Base.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-10.png\\",alt:\\"10 - github handling 502\\",width:\\"1999\\",height:\\"1092\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The steps to ingest documents into the Knowledge Base are the following:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-11.png\\",alt:\\"11 - using internal knowledge\\",width:\\"1999\\",height:\\"1122\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"ingest-your-organizations-knowledge-into-elasticsearch\\",children:\\"Ingest your organization\\\\u2019s knowledge into Elasticsearch\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Option 1:\\"}),\\" \\",(0,t.jsx)(e.strong,{children:\\"Use the\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/enterprise-search/current/crawler.html\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"Elastic web crawler\\"})}),\\" \\",(0,t.jsx)(e.strong,{children:\\".\\"}),\\" Use the web crawler to programmatically discover, extract, and index searchable content from websites and knowledge bases. When you ingest data with the web crawler, a search-optimized \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/what-is-an-elasticsearch-index\\",rel:\\"nofollow\\",children:\\"Elasticsearch\\\\xAE index\\"}),\\" is created to hold and sync webpage content.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Option 2: Use Elasticsearch\'s\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"Index API\\"})}),\\" \\",(0,t.jsx)(e.strong,{children:\\".\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-ingest-guides.html\\",rel:\\"nofollow\\",children:\\"Watch tutorials\\"}),\\" that demonstrate how you can use the Elasticsearch language clients to ingest data from an application.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Option 3: Build your own connector.\\"}),\\" Follow the steps described in this blog: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/search-labs/how-to-create-customized-connectors-for-elasticsearch\\",rel:\\"nofollow\\",children:\\"How to create customized connectors for Elasticsearch\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Option 4: Use Elasticsearch\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/workplace-search/current/workplace-search-content-sources.html\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.strong,{children:\\"Workplace Search connectors\\"})}),\\" \\",(0,t.jsx)(e.strong,{children:\\".\\"}),\\" For example, the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/workplace-search/current/workplace-search-github-connector.html\\",rel:\\"nofollow\\",children:\\"GitHub connector\\"}),\\" can automatically capture, sync, and index issues, Markdown files, pull requests, and repos.\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Follow the steps to \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/workplace-search/current/workplace-search-github-connector.html#github-configuration\\",rel:\\"nofollow\\",children:\\"configure the GitHub Connector in GitHub\\"}),\\" to create an OAuth App from the GitHub platform.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-12.png\\",alt:\\"12 - elastic workplace search\\",width:\\"1388\\",height:\\"792\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Now you can connect a GitHub instance to your organization. Head to your organization\\\\u2019s \\",(0,t.jsx)(e.strong,{children:\\"Search > Workplace Search\\"}),\\" administrative dashboard, and locate the Sources tab.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/Screenshot_2023-11-08_at_10.19.19_AM.png\\",alt:\\"13 - screenshot\\",width:\\"912\\",height:\\"622\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Select \\",(0,t.jsx)(e.strong,{children:\\"GitHub\\"}),\\" (or GitHub Enterprise) in the Configured Sources list, and follow the GitHub authentication flow as presented. Upon the successful authentication flow, you will be redirected to Workplace Search and will be prompted to select the Organization you would like to synchronize.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-14.png\\",alt:\\"14 - configure and connect\\",width:\\"1999\\",height:\\"1171\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-15.png\\",alt:\\"15 - how to add github\\",width:\\"1999\\",height:\\"948\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-16.png\\",alt:\\"16 - github\\",width:\\"1720\\",height:\\"976\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"After configuring the connector and selecting the organization, the content should be synchronized and you will be able to see it in Sources. If you don\\\\u2019t need to index all the available content, you can specify the indexing rules via the API. This will help shorten indexing times and limit the size of the index. See \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/workplace-search/current/workplace-search-customizing-indexing-rules.html\\",rel:\\"nofollow\\",children:\\"Customizing indexing\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-17.png\\",alt:\\"17 - source overview\\",width:\\"1370\\",height:\\"864\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"The source has created an index in Elastic with the content (Issues, Markdown Files\\\\u2026) from your organization. You can find the index name by navigating to \\",(0,t.jsx)(e.strong,{children:\\"Stack Management > Index Management\\"}),\\" , activating the \\",(0,t.jsx)(e.strong,{children:\\"Include hidden Indices\\"}),\\" button on the right, and searching for \\\\u201CGitHub.\\\\u201D\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-18.png\\",alt:\\"18 - index mgmt\\",width:\\"1999\\",height:\\"626\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"You can explore the documents you have indexed by creating a Data View and exploring it in Discover. Go to \\",(0,t.jsx)(e.strong,{children:\\"Stack Management > Kibana > Data Views > Create data view\\"}),\\" and introduce the data view Name, Index pattern (make sure you activate \\\\u201CAllow hidden and system indices\\\\u201D in advanced options), and Timestamp field:\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-19.png\\",alt:\\"19 - create data view\\",width:\\"1802\\",height:\\"1142\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"You can now explore the documents in Discover using the data view:\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-20.png\\",alt:\\"20 - data view\\",width:\\"1999\\",height:\\"1047\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"reindex-your-internal-runbooks-into-the-ai-assistants-knowledge-base-index-using-its-semantic-search-pipeline\\",children:\\"Reindex your internal runbooks into the AI Assistant\\\\u2019s Knowledge Base Index, using it\'s semantic search pipeline\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Your Knowledge Base documents are stored in the indices \\",(0,t.jsx)(e.em,{children:\\".kibana-observability-ai-assistant-kb-*\\"}),\\". To add your internal runbooks imported from GitHub to the KB, you just need to reindex the documents from the index you created in the previous step to the KB\\\\u2019s index. To add the semantic search capabilities to the documents in the KB, the reindex should also use the ELSER pipeline preconfigured for the KB, \\",(0,t.jsx)(e.em,{children:\\".kibana-observability-ai-assistant-kb-ingest-pipeline\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"By creating a Data View with the KB index, you can explore the content in Discover.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"You execute the query below in \\",(0,t.jsx)(e.strong,{children:\\"Management > Dev Tools\\"}),\\" , making sure to replace the following, both on \\\\u201C_source\\\\u201D and \\\\u201Cinline\\\\u201D:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"InternalDocsIndex : name of the index where your internal docs are stored\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"text_field : name of the field with the text of your internal docs\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"timestamp : name of the field of the timestamp in your internal docs\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"public : (true or false) if true, makes a document available to all users in the defined \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/xpack-spaces.html\\",rel:\\"nofollow\\",children:\\"Kibana Space\\"}),\\" (if is defined) or in all spaces (if is not defined); if false, document will be restricted to the user indicated in\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"(optional) space : if defined, restricts the internal document to be available in a specific \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/xpack-spaces.html\\",rel:\\"nofollow\\",children:\\"Kibana Space\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:\\"(optional) user.name : if defined, restricts the internal document to be available for a specific user\\"}),`\\n`,(0,t.jsx)(e.li,{children:\'(optional) \\"query\\" filter to index only certain docs (see below)\'}),`\\n`]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`POST _reindex\\n{\\n \\"source\\": {\\n \\"index\\": \\"\\",\\n \\"_source\\": [\\n \\"\\",\\n \\"\\",\\n \\"namespace\\",\\n \\"is_correction\\",\\n \\"public\\",\\n \\"confidence\\"\\n ]\\n },\\n \\"dest\\": {\\n \\"index\\": \\".kibana-observability-ai-assistant-kb-000001\\",\\n \\"pipeline\\": \\".kibana-observability-ai-assistant-kb-ingest-pipeline\\"\\n },\\n \\"script\\": {\\n \\"inline\\": \\"ctx._source.text=ctx._source.remove(\\\\\\\\\\"\\\\\\\\\\");ctx._source.namespace=\\\\\\\\\\"\\\\\\\\\\";ctx._source.is_correction=false;ctx._source.public=;ctx._source.confidence=\\\\\\\\\\"high\\\\\\\\\\";ctx._source[\'@timestamp\']=ctx._source.remove(\\\\\\\\\\"\\\\\\\\\\");ctx._source[\'user.name\'] = \\\\\\\\\\"\\\\\\\\\\"\\"\\n }\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You may want to specify the type of documents that you reindex in the KB \\\\u2014 for example, you may only want to reindex Markdown documents (like Runbooks). You can add a \\\\u201Cquery\\\\u201D filter to the documents in the source. In the case of GitHub, runbooks are identified with the \\\\u201Ctype\\\\u201D field containing the string \\\\u201Cfile,\\\\u201D and you could add that to the reindex query like indicated below. To add also GitHub Issues, you can also include in the query \\\\u201Ctype\\\\u201D field containing the string \\\\u201Cissues\\\\u201D:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-json\\",children:`\\"source\\": {\\n \\"index\\": \\"\\",\\n \\"_source\\": [\\n \\"\\",\\n \\"\\",\\n \\"namespace\\",\\n \\"is_correction\\",\\n \\"public\\",\\n \\"confidence\\"\\n ],\\n \\"query\\": {\\n \\"terms\\": {\\n \\"type\\": [\\"file\\"]\\n }\\n }\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Great! Now that the data is stored in your Knowledge Base, you can ask the Observability AI Assistant any questions about it:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/elastic-blog-21.png\\",alt:\\"21 - new conversation\\",width:\\"1360\\",height:\\"1252\\"})}),`\\n`,(0,t.jsx)(i,{vidyardUuid:\\"zRxsp1EYjmR4FW4yRtSxcr\\",loop:!0}),`\\n`,(0,t.jsx)(i,{vidyardUuid:\\"vV5md3mVtY8KxUVjSvtT7V\\",loop:!0}),`\\n`,(0,t.jsx)(e.h2,{id:\\"conclusion\\",children:\\"Conclusion\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In conclusion, leveraging internal Observability knowledge and adding it to the Elastic Knowledge Base can greatly enhance the capabilities of the AI Assistant. By manually inputting information or programmatically ingesting documents, SREs can create a central repository of knowledge accessible through the power of Elastic and LLMs. The AI Assistant can recall this information, assist with incidents, and provide tailored observability to specific contexts using Retrieval Augmented Generation. By following the steps outlined in this article, organizations can unlock the full potential of their Elastic AI Assistant.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/generative-ai/ai-assistant\\",rel:\\"nofollow\\",children:\\"Start enriching your Knowledge Base with the Elastic AI Assistant today\\"}),\\" and empower your SRE team with the tools they need to excel. Follow the steps outlined in this article and take your incident management and alert remediation processes to the next level. Your journey toward a more efficient and effective SRE operation begins now.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"In this blog post, we may have used or referred to third party generative AI tools, which are owned and operated by their respective owners. Elastic does not have any control over the third party tools and we have no responsibility or liability for their content, operation or use, nor for any loss or damage that may arise from your use of such tools. Please exercise caution when using AI tools with personal, sensitive or confidential information. Any data you submit may be used for AI training or other purposes. There is no guarantee that information you provide will be kept secure or confidential. You should familiarize yourself with the privacy practices and terms of use of any generative AI tools prior to use.\\"})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Elastic, Elasticsearch, ESRE, Elasticsearch Relevance Engine and associated marks are trademarks, logos or registered trademarks of Elasticsearch N.V. in the United States and other countries. All other company and product names are trademarks, logos or registered trademarks of their respective owners.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}function x(n,e){throw new Error(\\"Expected \\"+(e?\\"component\\":\\"object\\")+\\" `\\"+n+\\"` to be defined: you likely forgot to import, pass, or provide it.\\")}return v(A);})();\\n;return Component;"},"_id":"articles/sre-troubleshooting-ai-assistant-observability-runbooks.mdx","_raw":{"sourceFilePath":"articles/sre-troubleshooting-ai-assistant-observability-runbooks.mdx","sourceFileName":"sre-troubleshooting-ai-assistant-observability-runbooks.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/sre-troubleshooting-ai-assistant-observability-runbooks"},"type":"Article","imageUrl":"/assets/images/sre-troubleshooting-ai-assistant-observability-runbooks/11-hand.jpg","readingTime":"16 min read","url":"/sre-troubleshooting-ai-assistant-observability-runbooks","headings":[{"level":2,"title":"Enhanced troubleshooting for SREs","href":"#enhanced-troubleshooting-for-sres"},{"level":2,"title":"How to troubleshoot an alert with the Knowledge Base","href":"#how-to-troubleshoot-an-alert-with-the-knowledge-base"},{"level":2,"title":"Ask the assistant to remember the content of an existing runbook","href":"#ask-the-assistant-to-remember-the-content-of-an-existing-runbook"},{"level":2,"title":"Ask the Assistant to summarize and store the steps taken during a conversation in the Knowledge Base","href":"#ask-the-assistant-to-summarize-and-store-the-steps-taken-during-a-conversation-in-the-knowledge-base"},{"level":2,"title":"Import your runbooks stored in GitHub to the Knowledge Base using APIs or our GitHub Connector","href":"#import-your-runbooks-stored-in-github-to-the-knowledge-base-using-apis-or-our-github-connector"},{"level":3,"title":"Ingest your organization’s knowledge into Elasticsearch","href":"#ingest-your-organizations-knowledge-into-elasticsearch"},{"level":3,"title":"Reindex your internal runbooks into the AI Assistant’s Knowledge Base Index, using it\'s semantic search pipeline","href":"#reindex-your-internal-runbooks-into-the-ai-assistants-knowledge-base-index-using-its-semantic-search-pipeline"},{"level":2,"title":"Conclusion","href":"#conclusion"}]},{"title":"Better RCAs with multi-agent AI Architecture","slug":"super-agent-architecture","date":"2024-05-31","description":"Discover how specialized LLM agents collaborate to tackle complex tasks with unparalleled efficiency","image":"githubcopilot-aiassistant.png","author":[{"slug":"baha-azarmi","type":"Author","_raw":{}},{"slug":"jeff-vestal","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"genai","type":"Tag","_raw":{}}],"body":{"raw":"\\n\\n## What’s a multi agent architecture?\\n\\nYou might have heard the term Agent pop up recently in different open source projects or vendors focusing their go-to-market on GenAI. Indeed, while most GenAI applications are focused on RAG applications today, there is an increasing interest in isolating tasks that could be achieved with a more special model into what is called an Agent.\\n\\nTo be clear, an agent will be given a task, which could be a prompt, and execute the task by leveraging other models, data sources, and a knowledge base. Depending on the field of application, the results should ultimately look like generated text, pictures, charts, or sounds. \\n\\nNow, what the multi-Agent Architecture, is the process of leveraging multiple agents around a given task by: \\n* Orchestrating complex system oversight with multiple agents \\n* Analyzing and strategizing in real-time with strategic reasoning \\n* Specializing agents, tasks are decomposed into smaller focused tasks into expert-handled elements\\n* Sharing insights for cohesive action plans, creating collaborative dynamics\\n\\nIn a nutshell, multi-agent architecture\'s superpower is tackling intricate challenges beyond human speed and solving complex problems. It enables a couple of things:\\n* Scale the intelligence as the data and complexity grows. The tasks are decomposed into smaller work units, and the expert network grows accordingly.\\n* Coordinate simultaneous actions across systems, scale collaboration \\n* Evolving with data allows continuous adaptation with new data for cutting-edge decision-making. \\n* Scalability, high performance, and resilience\\n\\n## Single Agent Vs Multi-Agent Architecture\\n\\nBefore double-clicking on the multi-agent architecture, let’s talk about the single-agent architecture. The single-agent architecture is designed for straightforward tasks and a late feedback loop from the end user. There are multiple single-agent frameworks such as ReAct (Reason+Act), RAISE (ReAct+ Short/Long term memory), Reflexion, AutoGPT+P, and LATS (Language Agent Tree Search). The general process these architectures enable is as follows:\\n\\n![alt_text](/assets/images/super-agent-architecture/single.png)\\n\\nThe Agent takes action, observes, executes, and self-decides whether or not it looks complete, ends the process if finished, or resubmits the new results as an input action, the process keeps going. \\n\\nWhile simple tasks are ok with this type of agent, such as a RAG application where a user will ask a question, and the agent returns an answer based on the LLM and a knowledge base, there are a couple of limitations:\\n* Endless execution loop: the agent is never satisfied with the output and reiterates. \\n* Hallucinations\\n* Lack of feedback loop or enough data to build a feedback loop\\n* Lack of planning \\n\\nFor these reasons, the need for a better self-evaluation loop, externalizing the observation phase, and division of labor is rising, creating the need for a multi-agent architecture.\\n\\nMulti-agent architecture relies on taking a complex task, breaking it down into multiple smaller tasks, planning the resolution of these tasks, executing, evaluating, sharing insights, and delivering an outcome. For this, there is more than one agent; in fact, the minimum value for the network size N is N=2 with:\\n* A Manager \\n* An Expert\\n\\nWhen N=2, the source task is simple enough only to need one expert agent as the task can not be broken down into multiple tasks. Now, when the task is more complex, this is what the architecture can look like:\\n\\n![alt_text](/assets/images/super-agent-architecture/multi-vertical.png)\\n\\n\\nWith the help of an LLM, the Manager decomposes the tasks and delegates the resolutions to multiple agents. The above architecture is called Vertical since the agents directly send their results to the Manager. In a horizontal architecture, agents work and share insight together as groups, with a volunteer-based system to complete a task, they do not need a leader as shown below:\\n\\n\\n![alt_text](/assets/images/super-agent-architecture/multi-horizontal.png)\\n\\n\\nA very good paper covering these two architectures with more insights can be found here: [https://arxiv.org/abs/2404.11584](https://arxiv.org/abs/2404.11584)\\n\\n\\n## Application Vertical Multi-Agent Architecture to Observability\\n\\nVertical Multi-Agent Architecture can have a manager, experts, and a communicator. This is particularly important when these architectures expose the task\'s result to an end user.\\n\\nIn the case of Observability, what we envision in this blog post is the scenario of an SRE running through a Root Cause Analysis (RCA) process. The high-level logic will look like this: \\n\\n\\n![alt_text](/assets/images/super-agent-architecture/maar-observability.png)\\n\\n* Communicator: \\n * Read the initial command from the Human\\n * Pass command to Manager\\n * Provide status updates to Human\\n * Provide a recommended resolution plan to the Human\\n * Relay follow-up commands from Human to Manager\\n* Manager: \\n * Read the initial command from the Communicator\\u2028\\n * Create working group\\u2028\\n * Assign Experts to group\\u2028\\n * Evaluate signals and recommendations from Experts\\u2028\\n * Generate recommended resolution plan\\u2028\\n * Execute plan (optional)\\n* Expert:\\n * Each expert task with singular expertise tied to Elastic integration\\u2028\\n * Use o11y AI Assistant to triage and troubleshoot data related to their expertise\\u2028\\n * Work with other Experts as needed to correlate issues\\u2028\\n * Provide recommended root cause analysis for their expertise (if applicable)\\u2028\\n * Provide recommended resolution plan for their expertise (if applicable)\\n\\nWe believe that breaking down the experts by integration provides enough granularity in the case of observability and allows them to focus on a specific data source. Doing this also gives the manager a breakdown key when receiving a complex incident involving multiple data layers (application, network, datastores, infrastructures).\\n\\nFor example, a complex task initiated by an alert in an e-commerce application could be “Revenue dropped by 30% in the last hour.” This task would be submitted to the manager, who will look at all services, applications, datastores, network components, and infrastructure involved and decompose these into investigation tasks. Each expert would investigate within their specific scope and provide observations to the manager. The manager will be responsible for correlating and providing observations on what caused the problem. \\n\\n\\n### Core Architecture\\n\\nIn the above example, we have decided to deploy the architecture on the below software architecture: \\n* The agent manager and expert agent are deployed on GCP or your favorite cloud provider\\n* Most of the components are written in Python\\n* A task management layer is necessary to queue the task to the expert\\n* Expert agents are specifically deployed by integration/data source and converse with the Elastic AI Assistant deployed in Kibana.\\n* The AI Assistant can access a real-time context to help the expert resolve their task.\\n* Elasticsearch is used as the AI Assistant context and as the expert memory to build its experience. \\n* The backend LLM here is GPT-4, now GTP-4o, running on Azure.\\n\\n\\n![alt_text](/assets/images/super-agent-architecture/core-architecture.png)\\n\\n### Agent Experience\\n\\nAgent experience is built based on previous events stored in Elasticsearch, to which the expert can look semantically for similar events. When they find one, they get the execution path stored in memory to execute it. \\n\\n\\n![alt_text](/assets/images/super-agent-architecture/agent-experience.png)\\n\\n\\nThe beauty of using the Elasticsearch Vector Database for this is the semantic query the agent will be able to execute against the memory and how the memory itself can be managed. Indeed, there is a notion of short—and long-term memory that could be very interesting in the case of observability, where some events often happen and probably worth to be stored in the short-term memory because they are questioned more often. Less queried but important events can be stored in a longer-term memory with more cost-effective hardware.\\n\\nThe other aspect of the Agent Experience is the semantic [reranking](https://www.elastic.co/search-labs/blog/semantic-reranking-with-retrievers) feature with Elasticsearch. When the agent executes a task, reranking is used to surface the best outcome compared to past experience:\\n\\n\\n![alt_text](/assets/images/super-agent-architecture/agent-experience-build.png)\\n\\n\\nIf you are looking for a working example of the above, [check this blog post](https://www.elastic.co/observability-labs/blog/elastic-ai-assistant-observability-escapes-kibana) where 2 agents are working together with the Elastic Observability AI Assistant on an RCA: \\n\\n\\n![alt_text](/assets/images/super-agent-architecture/ops-burger.png)\\n\\n\\n","code":"var Component=(()=>{var g=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,b=Object.prototype.hasOwnProperty;var w=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),f=(n,e)=>{for(var i in e)r(n,i,{get:e[i],enumerable:!0})},o=(n,e,i,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of p(e))!b.call(n,a)&&a!==i&&r(n,a,{get:()=>e[a],enumerable:!(s=u(e,a))||s.enumerable});return n};var v=(n,e,i)=>(i=n!=null?g(m(n)):{},o(e||!n||!n.__esModule?r(i,\\"default\\",{value:n,enumerable:!0}):i,n)),k=n=>o(r({},\\"__esModule\\",{value:!0}),n);var h=w((E,l)=>{l.exports=_jsx_runtime});var y={};f(y,{default:()=>d,frontmatter:()=>x});var t=v(h()),x={title:\\"Better RCAs with multi-agent AI Architecture\\",slug:\\"super-agent-architecture\\",date:\\"2024-05-31\\",description:\\"Discover how specialized LLM agents collaborate to tackle complex tasks with unparalleled efficiency\\",author:[{slug:\\"baha-azarmi\\"},{slug:\\"jeff-vestal\\"}],image:\\"githubcopilot-aiassistant.png\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"genai\\"}]};function c(n){let e={a:\\"a\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.h2,{id:\\"whats-a-multi-agent-architecture\\",children:\\"What\\\\u2019s a multi agent architecture?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You might have heard the term Agent pop up recently in different open source projects or vendors focusing their go-to-market on GenAI. Indeed, while most GenAI applications are focused on RAG applications today, there is an increasing interest in isolating tasks that could be achieved with a more special model into what is called an Agent.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To be clear, an agent will be given a task, which could be a prompt, and execute the task by leveraging other models, data sources, and a knowledge base. Depending on the field of application, the results should ultimately look like generated text, pictures, charts, or sounds.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now, what the multi-Agent Architecture, is the process of leveraging multiple agents around a given task by:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Orchestrating complex system oversight with multiple agents\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Analyzing and strategizing in real-time with strategic reasoning\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Specializing agents, tasks are decomposed into smaller focused tasks into expert-handled elements\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Sharing insights for cohesive action plans, creating collaborative dynamics\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In a nutshell, multi-agent architecture\'s superpower is tackling intricate challenges beyond human speed and solving complex problems. It enables a couple of things:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Scale the intelligence as the data and complexity grows. The tasks are decomposed into smaller work units, and the expert network grows accordingly.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Coordinate simultaneous actions across systems, scale collaboration\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Evolving with data allows continuous adaptation with new data for cutting-edge decision-making.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Scalability, high performance, and resilience\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"single-agent-vs-multi-agent-architecture\\",children:\\"Single Agent Vs Multi-Agent Architecture\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before double-clicking on the multi-agent architecture, let\\\\u2019s talk about the single-agent architecture. The single-agent architecture is designed for straightforward tasks and a late feedback loop from the end user. There are multiple single-agent frameworks such as ReAct (Reason+Act), RAISE (ReAct+ Short/Long term memory), Reflexion, AutoGPT+P, and LATS (Language Agent Tree Search). The general process these architectures enable is as follows:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/super-agent-architecture/single.png\\",alt:\\"alt_text\\",width:\\"1830\\",height:\\"614\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Agent takes action, observes, executes, and self-decides whether or not it looks complete, ends the process if finished, or resubmits the new results as an input action, the process keeps going.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"While simple tasks are ok with this type of agent, such as a RAG application where a user will ask a question, and the agent returns an answer based on the LLM and a knowledge base, there are a couple of limitations:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Endless execution loop: the agent is never satisfied with the output and reiterates.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Hallucinations\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Lack of feedback loop or enough data to build a feedback loop\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Lack of planning\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"For these reasons, the need for a better self-evaluation loop, externalizing the observation phase, and division of labor is rising, creating the need for a multi-agent architecture.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Multi-agent architecture relies on taking a complex task, breaking it down into multiple smaller tasks, planning the resolution of these tasks, executing, evaluating, sharing insights, and delivering an outcome. For this, there is more than one agent; in fact, the minimum value for the network size N is N=2 with:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"A Manager\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"An Expert\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"When N=2, the source task is simple enough only to need one expert agent as the task can not be broken down into multiple tasks. Now, when the task is more complex, this is what the architecture can look like:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/super-agent-architecture/multi-vertical.png\\",alt:\\"alt_text\\",width:\\"1816\\",height:\\"922\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With the help of an LLM, the Manager decomposes the tasks and delegates the resolutions to multiple agents. The above architecture is called Vertical since the agents directly send their results to the Manager. In a horizontal architecture, agents work and share insight together as groups, with a volunteer-based system to complete a task, they do not need a leader as shown below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/super-agent-architecture/multi-horizontal.png\\",alt:\\"alt_text\\",width:\\"1999\\",height:\\"1241\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"A very good paper covering these two architectures with more insights can be found here: \\",(0,t.jsx)(e.a,{href:\\"https://arxiv.org/abs/2404.11584\\",rel:\\"nofollow\\",children:\\"https://arxiv.org/abs/2404.11584\\"})]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"application-vertical-multi-agent-architecture-to-observability\\",children:\\"Application Vertical Multi-Agent Architecture to Observability\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Vertical Multi-Agent Architecture can have a manager, experts, and a communicator. This is particularly important when these architectures expose the task\'s result to an end user.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the case of Observability, what we envision in this blog post is the scenario of an SRE running through a Root Cause Analysis (RCA) process. The high-level logic will look like this:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/super-agent-architecture/maar-observability.png\\",alt:\\"alt_text\\",width:\\"1806\\",height:\\"1134\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Communicator:\\",`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Read the initial command from the Human\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Pass command to Manager\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Provide status updates to Human\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Provide a recommended resolution plan to the Human\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Relay follow-up commands from Human to Manager\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Manager:\\",`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Read the initial command from the Communicator\\\\u2028\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Create working group\\\\u2028\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Assign Experts to group\\\\u2028\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Evaluate signals and recommendations from Experts\\\\u2028\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Generate recommended resolution plan\\\\u2028\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Execute plan (optional)\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Expert:\\",`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Each expert task with singular expertise tied to Elastic integration\\\\u2028\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Use o11y AI Assistant to triage and troubleshoot data related to their expertise\\\\u2028\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Work with other Experts as needed to correlate issues\\\\u2028\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Provide recommended root cause analysis for their expertise (if applicable)\\\\u2028\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Provide recommended resolution plan for their expertise (if applicable)\\"}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We believe that breaking down the experts by integration provides enough granularity in the case of observability and allows them to focus on a specific data source. Doing this also gives the manager a breakdown key when receiving a complex incident involving multiple data layers (application, network, datastores, infrastructures).\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"For example, a complex task initiated by an alert in an e-commerce application could be \\\\u201CRevenue dropped by 30% in the last hour.\\\\u201D This task would be submitted to the manager, who will look at all services, applications, datastores, network components, and infrastructure involved and decompose these into investigation tasks. Each expert would investigate within their specific scope and provide observations to the manager. The manager will be responsible for correlating and providing observations on what caused the problem.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"core-architecture\\",children:\\"Core Architecture\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the above example, we have decided to deploy the architecture on the below software architecture:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"The agent manager and expert agent are deployed on GCP or your favorite cloud provider\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Most of the components are written in Python\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"A task management layer is necessary to queue the task to the expert\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Expert agents are specifically deployed by integration/data source and converse with the Elastic AI Assistant deployed in Kibana.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"The AI Assistant can access a real-time context to help the expert resolve their task.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Elasticsearch is used as the AI Assistant context and as the expert memory to build its experience.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"The backend LLM here is GPT-4, now GTP-4o, running on Azure.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/super-agent-architecture/core-architecture.png\\",alt:\\"alt_text\\",width:\\"1322\\",height:\\"954\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"agent-experience\\",children:\\"Agent Experience\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Agent experience is built based on previous events stored in Elasticsearch, to which the expert can look semantically for similar events. When they find one, they get the execution path stored in memory to execute it.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/super-agent-architecture/agent-experience.png\\",alt:\\"alt_text\\",width:\\"1126\\",height:\\"678\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The beauty of using the Elasticsearch Vector Database for this is the semantic query the agent will be able to execute against the memory and how the memory itself can be managed. Indeed, there is a notion of short\\\\u2014and long-term memory that could be very interesting in the case of observability, where some events often happen and probably worth to be stored in the short-term memory because they are questioned more often. Less queried but important events can be stored in a longer-term memory with more cost-effective hardware.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The other aspect of the Agent Experience is the semantic \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/search-labs/blog/semantic-reranking-with-retrievers\\",rel:\\"nofollow\\",children:\\"reranking\\"}),\\" feature with Elasticsearch. When the agent executes a task, reranking is used to surface the best outcome compared to past experience:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/super-agent-architecture/agent-experience-build.png\\",alt:\\"alt_text\\",width:\\"674\\",height:\\"674\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"If you are looking for a working example of the above, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-ai-assistant-observability-escapes-kibana\\",rel:\\"nofollow\\",children:\\"check this blog post\\"}),\\" where 2 agents are working together with the Elastic Observability AI Assistant on an RCA:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/super-agent-architecture/ops-burger.png\\",alt:\\"alt_text\\",width:\\"1999\\",height:\\"1046\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(c,{...n})}):c(n)}return k(y);})();\\n;return Component;"},"_id":"articles/super-agent-architecture.mdx","_raw":{"sourceFilePath":"articles/super-agent-architecture.mdx","sourceFileName":"super-agent-architecture.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/super-agent-architecture"},"type":"Article","imageUrl":"/assets/images/super-agent-architecture/githubcopilot-aiassistant.png","readingTime":"8 min read","url":"/super-agent-architecture","headings":[{"level":2,"title":"What’s a multi agent architecture?","href":"#whats-a-multi-agent-architecture"},{"level":2,"title":"Single Agent Vs Multi-Agent Architecture","href":"#single-agent-vs-multi-agent-architecture"},{"level":2,"title":"Application Vertical Multi-Agent Architecture to Observability","href":"#application-vertical-multi-agent-architecture-to-observability"},{"level":3,"title":"Core Architecture","href":"#core-architecture"},{"level":3,"title":"Agent Experience","href":"#agent-experience"}]},{"title":"Supercharge Your vSphere Monitoring with Enhanced vSphere Integration","slug":"supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration","date":"2024-12-11","description":"Supercharge Your vSphere Monitoring with Enhanced vSphere Integration","image":"title.jpeg","author":[{"slug":"ishleen-kaur","type":"Author","_raw":{}},{"slug":"lalit-satapathy","type":"Author","_raw":{}}],"tags":[{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"observability","type":"Tag","_raw":{}}],"featured":true,"body":{"raw":"\\n[vSphere](https://www.vmware.com/products/cloud-infrastructure/vsphere) is VMware\'s cloud computing virtualization platform that provides a powerful suite for managing virtualized resources. It allows organizations to create, manage, and optimize virtual environments, providing advanced capabilities such as high availability, load balancing, and simplified resource allocation. vSphere enables efficient utilization of hardware resources, reducing costs while increasing the flexibility and scalability of IT infrastructure.\\n\\nWith the release of an upgraded [vSphere integration](https://www.elastic.co/docs/current/integrations/vsphere) we now support an enhanced set of metrics and datastreams. Package version 1.15.0 onwards introduces new datastreams that significantly improve the collection of performance metrics, providing deeper insights into your vSphere environment.\\n\\nThis enhanced version includes a total of seven datastreams, featuring critical new metrics such as disk performance, memory utilization, and network status. Additionally, these datastreams now offer detailed visibility into associated resources like hosts, clusters, and resource pools. To make the most of these insights, we’ve also introduced prebuilt dashboards, helping teams monitor and troubleshoot their vSphere environments with ease and precision.\\n\\n We have expanded the performance metrics to encompass a broader range of insights across all datastreams, while also introducing new datastreams for clusters, resource pools, and networks. This enhanced integration version now includes a total of seven datastreams, featuring critical new metrics such as disk performance, memory utilization, and network status. Additionally, these datastreams now offer detailed visibility into associated resources like hosts, clusters, and resource pools. \\n\\nEach datastream also includes detailed alarm information, such as the alarm name, description, status (e.g. critical or warning), and the affected entity\'s name. To make the most of these insights, we’ve also introduced prebuilt dashboards, helping teams monitor and troubleshoot their vSphere environments with ease and precision.\\n\\n## Overview of the Datastreams\\n\\n- **Host Datastream:** This datastream monitors the disk performance of the host, including metrics such as disk latency, average read/write bytes, uptime, and status. It also captures network metrics, such as packet information, network bandwidth, and utilization, as well as CPU and memory usage of the host. Additionally, it lists associated datastores, virtual machines, and networks within vSphere.\\n\\n![Host Datastream](/assets/images/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration/hosts.png)\\n\\n- **Virtual Machine Datastream:** This datastream tracks the used and available CPU and memory resources of virtual machines, along with the uptime and status of each VM. It includes information about the host on which the VM is running, as well as detailed snapshot metrics like the number of snapshots, creation dates, and descriptions. Additionally, it provides insights into associated hosts and datastores.\\n\\n![Virtual Machine Datastream](/assets/images/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration/virtualmachine.png)\\n\\n\\n- **Datastore Datastream:** This datastream provides information on the total, used, and available capacity of datastores, along with their overall status. It also captures metrics such as the average read/write rate and lists the hosts and virtual machines connected to each datastore.\\n\\n- **Datastore Cluster:** A datastore cluster in vSphere is a collection of datastores grouped together for efficient storage management. This datastream provides details on the total capacity and free space in the storage pod, along with the list of datastores within the cluster.\\n\\n![Datastore Datastream](/assets/images/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration/datastore.png)\\n\\n\\n- **Resource Pool:** Resource pools in vSphere serve as logical abstractions that allow flexible allocation of CPU and memory resources. This datastream captures memory metrics, including swapped, ballooned, and shared memory, as well as CPU metrics like distributed and static CPU entitlement. It also lists the virtual machines associated with each resource pool.\\n\\n- **Network Datastream:** This datastream captures the overall configuration and status of the network, including network types (e.g., vSS, vDS). It also lists the hosts and virtual machines connected to each network.\\n\\n- **Cluster Datastream:** A Cluster in vSphere is a collection of ESXi hosts and their associated virtual machines that function as a unified resource pool. Clustering in vSphere allows administrators to manage multiple hosts and resources centrally, providing high availability, load balancing, and scalability to the virtual environment. This datastream includes metrics indicating whether HA or admission control is enabled and lists the hosts, networks, and datastores associated with the cluster.\\n\\n\\n## Alarms support in vSphere Integration \\n\\nAlarms are a vital part of the vSphere integration, providing real-time insights into critical events across your virtual environment. In the updated Elastic’s vSphere integration, alarms are now reported for all the entities. They include detailed information such as the alarm name, description, severity (e.g., critical or warning), affected entity, and triggered time. These alarms are seamlessly integrated into datastreams, helping administrators and SREs quickly identify and resolve issues like resource shortages or performance bottlenecks.\\n\\n#### Example Alarm\\n\\n```yaml\\n\\"triggered_alarms\\": [\\n {\\n \\"description\\": \\"Default alarm to monitor host memory usage\\",\\n \\"entity_name\\": \\"host_us\\",\\n \\"id\\": \\"alarm-4.host-12\\",\\n \\"name\\": \\"Host memory usage\\",\\n \\"status\\": \\"red\\",\\n \\"triggered_time\\": \\"2024-08-28T10:31:26.621Z\\"\\n }\\n]\\n```\\nThis example highlights a triggered alarm for monitoring host memory usage, indicating a critical status (red) for the host \\"host_us.\\" Such alarms empower teams to act swiftly and maintain the stability of their vSphere environment. \\n\\n## Lets Try It Out!\\n\\nThe new [vSphere integration](https://www.elastic.co/docs/current/integrations/vsphere) in Elastic Cloud is more than just a monitoring tool; it’s a comprehensive solution that empowers you to manage and optimize your virtual environments effectively. With deeper insights and enhanced data granularity, you can ensure high availability, improved load balancing, and smarter resource allocation. Spin up an Elastic Cloud, and start monitoring your vSphere infrastructure.","code":"var Component=(()=>{var m=Object.create;var n=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,v=Object.prototype.hasOwnProperty;var f=(a,e)=>()=>(e||a((e={exports:{}}).exports,e),e.exports),w=(a,e)=>{for(var i in e)n(a,i,{get:e[i],enumerable:!0})},o=(a,e,i,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let r of p(e))!v.call(a,r)&&r!==i&&n(a,r,{get:()=>e[r],enumerable:!(s=u(e,r))||s.enumerable});return a};var y=(a,e,i)=>(i=a!=null?m(g(a)):{},o(e||!a||!a.__esModule?n(i,\\"default\\",{value:a,enumerable:!0}):i,a)),b=a=>o(n({},\\"__esModule\\",{value:!0}),a);var h=f((D,l)=>{l.exports=_jsx_runtime});var k={};w(k,{default:()=>c,frontmatter:()=>S});var t=y(h()),S={title:\\"Supercharge Your vSphere Monitoring with Enhanced vSphere Integration\\",slug:\\"supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration\\",date:\\"2024-12-11\\",description:\\"Supercharge Your vSphere Monitoring with Enhanced vSphere Integration\\",author:[{slug:\\"ishleen-kaur\\"},{slug:\\"lalit-satapathy\\"}],image:\\"title.jpeg\\",featured:!0,tags:[{slug:\\"metrics\\"},{slug:\\"observability\\"}]};function d(a){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h4:\\"h4\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...a.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.vmware.com/products/cloud-infrastructure/vsphere\\",rel:\\"nofollow\\",children:\\"vSphere\\"}),\\" is VMware\'s cloud computing virtualization platform that provides a powerful suite for managing virtualized resources. It allows organizations to create, manage, and optimize virtual environments, providing advanced capabilities such as high availability, load balancing, and simplified resource allocation. vSphere enables efficient utilization of hardware resources, reducing costs while increasing the flexibility and scalability of IT infrastructure.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"With the release of an upgraded \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/vsphere\\",rel:\\"nofollow\\",children:\\"vSphere integration\\"}),\\" we now support an enhanced set of metrics and datastreams. Package version 1.15.0 onwards introduces new datastreams that significantly improve the collection of performance metrics, providing deeper insights into your vSphere environment.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This enhanced version includes a total of seven datastreams, featuring critical new metrics such as disk performance, memory utilization, and network status. Additionally, these datastreams now offer detailed visibility into associated resources like hosts, clusters, and resource pools. To make the most of these insights, we\\\\u2019ve also introduced prebuilt dashboards, helping teams monitor and troubleshoot their vSphere environments with ease and precision.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We have expanded the performance metrics to encompass a broader range of insights across all datastreams, while also introducing new datastreams for clusters, resource pools, and networks. This enhanced integration version now includes a total of seven datastreams, featuring critical new metrics such as disk performance, memory utilization, and network status. Additionally, these datastreams now offer detailed visibility into associated resources like hosts, clusters, and resource pools.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Each datastream also includes detailed alarm information, such as the alarm name, description, status (e.g. critical or warning), and the affected entity\'s name. To make the most of these insights, we\\\\u2019ve also introduced prebuilt dashboards, helping teams monitor and troubleshoot their vSphere environments with ease and precision.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"overview-of-the-datastreams\\",children:\\"Overview of the Datastreams\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Host Datastream:\\"}),\\" This datastream monitors the disk performance of the host, including metrics such as disk latency, average read/write bytes, uptime, and status. It also captures network metrics, such as packet information, network bandwidth, and utilization, as well as CPU and memory usage of the host. Additionally, it lists associated datastores, virtual machines, and networks within vSphere.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration/hosts.png\\",alt:\\"Host Datastream\\",width:\\"1202\\",height:\\"1222\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Virtual Machine Datastream:\\"}),\\" This datastream tracks the used and available CPU and memory resources of virtual machines, along with the uptime and status of each VM. It includes information about the host on which the VM is running, as well as detailed snapshot metrics like the number of snapshots, creation dates, and descriptions. Additionally, it provides insights into associated hosts and datastores.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration/virtualmachine.png\\",alt:\\"Virtual Machine Datastream\\",width:\\"822\\",height:\\"1632\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Datastore Datastream:\\"}),\\" This datastream provides information on the total, used, and available capacity of datastores, along with their overall status. It also captures metrics such as the average read/write rate and lists the hosts and virtual machines connected to each datastore.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Datastore Cluster:\\"}),\\" A datastore cluster in vSphere is a collection of datastores grouped together for efficient storage management. This datastream provides details on the total capacity and free space in the storage pod, along with the list of datastores within the cluster.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration/datastore.png\\",alt:\\"Datastore Datastream\\",width:\\"1196\\",height:\\"1686\\"})}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Resource Pool:\\"}),\\" Resource pools in vSphere serve as logical abstractions that allow flexible allocation of CPU and memory resources. This datastream captures memory metrics, including swapped, ballooned, and shared memory, as well as CPU metrics like distributed and static CPU entitlement. It also lists the virtual machines associated with each resource pool.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Network Datastream:\\"}),\\" This datastream captures the overall configuration and status of the network, including network types (e.g., vSS, vDS). It also lists the hosts and virtual machines connected to each network.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Cluster Datastream:\\"}),\\" A Cluster in vSphere is a collection of ESXi hosts and their associated virtual machines that function as a unified resource pool. Clustering in vSphere allows administrators to manage multiple hosts and resources centrally, providing high availability, load balancing, and scalability to the virtual environment. This datastream includes metrics indicating whether HA or admission control is enabled and lists the hosts, networks, and datastores associated with the cluster.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"alarms-support-in-vsphere-integration\\",children:\\"Alarms support in vSphere Integration\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Alarms are a vital part of the vSphere integration, providing real-time insights into critical events across your virtual environment. In the updated Elastic\\\\u2019s vSphere integration, alarms are now reported for all the entities. They include detailed information such as the alarm name, description, severity (e.g., critical or warning), affected entity, and triggered time. These alarms are seamlessly integrated into datastreams, helping administrators and SREs quickly identify and resolve issues like resource shortages or performance bottlenecks.\\"}),`\\n`,(0,t.jsx)(e.h4,{id:\\"example-alarm\\",children:\\"Example Alarm\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`\\"triggered_alarms\\": [\\n {\\n \\"description\\": \\"Default alarm to monitor host memory usage\\",\\n \\"entity_name\\": \\"host_us\\",\\n \\"id\\": \\"alarm-4.host-12\\",\\n \\"name\\": \\"Host memory usage\\",\\n \\"status\\": \\"red\\",\\n \\"triggered_time\\": \\"2024-08-28T10:31:26.621Z\\"\\n }\\n]\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\'This example highlights a triggered alarm for monitoring host memory usage, indicating a critical status (red) for the host \\"host_us.\\" Such alarms empower teams to act swiftly and maintain the stability of their vSphere environment.\'}),`\\n`,(0,t.jsx)(e.h2,{id:\\"lets-try-it-out\\",children:\\"Lets Try It Out!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The new \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/docs/current/integrations/vsphere\\",rel:\\"nofollow\\",children:\\"vSphere integration\\"}),\\" in Elastic Cloud is more than just a monitoring tool; it\\\\u2019s a comprehensive solution that empowers you to manage and optimize your virtual environments effectively. With deeper insights and enhanced data granularity, you can ensure high availability, improved load balancing, and smarter resource allocation. Spin up an Elastic Cloud, and start monitoring your vSphere infrastructure.\\"]})]})}function c(a={}){let{wrapper:e}=a.components||{};return e?(0,t.jsx)(e,{...a,children:(0,t.jsx)(d,{...a})}):d(a)}return b(k);})();\\n;return Component;"},"_id":"articles/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration.mdx","_raw":{"sourceFilePath":"articles/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration.mdx","sourceFileName":"supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration"},"type":"Article","imageUrl":"/assets/images/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration/title.jpeg","readingTime":"5 min read","url":"/supercharge-your-vsphere-monitoring-with-enhanced-vsphere-integration","headings":[{"level":2,"title":"Overview of the Datastreams","href":"#overview-of-the-datastreams"},{"level":2,"title":"Alarms support in vSphere Integration ","href":"#alarms-support-in-vsphere-integration-"},{"level":4,"title":"Example Alarm","href":"#example-alarm"},{"level":2,"title":"Lets Try It Out!","href":"#lets-try-it-out"}]},{"title":"Tailoring span names and enriching spans without changing code with OpenTelemetry - Part 1","slug":"tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry","date":"2024-08-26","description":"The OpenTelemetry Collector offers powerful capabilities to enrich and refine telemetry data before it reaches your observability tools. In this blog post, we\'ll explore how to leverage the Collector to create more meaningful transaction names in Elastic Observability, significantly enhancing the value of your monitoring data.","image":"tailor.jpg","author":[{"slug":"david-hope","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe OpenTelemetry Collector offers powerful capabilities to enrich and refine telemetry data before it reaches your observability tools. In this blog post, we\'ll explore how to leverage the Collector to create more meaningful transaction names in Elastic Observability, significantly enhancing the value of your monitoring data.\\n\\nConsider this scenario: You have a transaction labeled simply as \\"HTTP GET\\" with an average response time of 5ms. However, this generic label masks a variety of distinct operations – payment processing, user logins, and adding items to a cart. Does that 5ms average truly represent the performance of these diverse actions? Clearly not. \\n\\nThe other problem that happens is that span traces become all mixed up so that login spans and image serving spans all become part of the same bucket, this makes things like latency correlation analysis hard in Elastic. \\n\\nWe\'ll focus on a specific technique using the collector\'s attributes, and transform processors to extract meaningful information from HTTP URLs and use it to create more descriptive span names. This approach not only improves the accuracy of your metrics but also enhances your ability to quickly identify and troubleshoot performance issues across your microservices architecture.\\n\\nBy using these processors in combination, we can quickly address the issue of overly generic transaction names, creating more granular and informative identifiers that provide accurate visibility into your services\' performance.\\n\\nHowever, it\'s crucial to approach this technique with caution. While more detailed transaction names can significantly improve observability, they can also lead to an unexpected challenge: cardinality explosion. As we dive into the implementation details, we\'ll also discuss how to strike the right balance between granularity and manageability, ensuring that our solution enhances rather than overwhelms our observability stack.\\n\\nIn the following sections, we\'ll walk through the configuration step-by-step, explaining how each processor contributes to our goal, and highlighting best practices to avoid potential pitfalls like cardinality issues. Whether you\'re new to OpenTelemetry or looking to optimize your existing setup, this guide will help you unlock more meaningful insights from your telemetry data.\\n\\n## Prerequisites and configuration\\n\\nIf you plan on following this blog, here are some of the components and details we used to set up the configuration:\\n\\n- Ensure you have an account on Elastic Cloud and a deployed stack (see instructions [here](https://www.elastic.co/cloud/)).\\n- I am also using the OpenTelemetry demo in my environment, this is important to follow along with as this demo has the specific issue I want to address. You should clone the repository and follow the instructions [here](https://github.com/elastic/opentelemetry-demo) to get this up and running. I recommend using Kubernetes and I will be doing this in my AWS EKS (Elastic Kubernetes Service) environment. \\n\\n### The OpenTelemetry Demo\\n\\nThe OpenTelemetry Demo is a comprehensive, microservices-based application designed to showcase the capabilities and best practices of OpenTelemetry instrumentation. It simulates an e-commerce platform, incorporating various services such as frontend, cart, checkout, and payment processing. This demo serves as an excellent learning tool and reference implementation for developers and organizations looking to adopt OpenTelemetry.\\n\\nThe demo application generates traces, metrics, and logs across its interconnected services, demonstrating how OpenTelemetry can provide deep visibility into complex, distributed systems. It\'s particularly useful for experimenting with different collection, processing, and visualization techniques, making it an ideal playground for exploring observability concepts and tools like the OpenTelemetry Collector.\\n\\nBy using real-world scenarios and common architectural patterns, the OpenTelemetry Demo helps users understand how to effectively implement observability in their own applications and how to leverage the data for performance optimization and troubleshooting.\\n\\nOnce you have an Elastic Cloud instance and you fire up the OpenTelemetry demo, you should see something like this on the Elastic Service Map page:\\n\\n![](/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image3.png)\\n\\nNavigating to the traces page will give you the following set up.\\n\\n![](/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image1.png)\\n\\nAs you can see there are some very broad transaction names here like HTTP GET and the averages will not be very accurate for specific business functions within your services as shown. \\n\\n![](/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image6.png)\\n\\nSo let\'s fix that with the OpenTelemetry Collector. \\n\\n## The OpenTelemetry Collector\\n\\nThe OpenTelemetry Collector is a vital component in the OpenTelemetry ecosystem, serving as a vendor-agnostic way to receive, process, and export telemetry data. It acts as a centralized observability pipeline that can collect traces, metrics, and logs from various sources, then transform and route this data to multiple backend systems. \\n\\nThe collector\'s flexible architecture allows for easy configuration and extension through a wide range of receivers, processors, and exporters which you can explore over [here](https://github.com/open-telemetry/opentelemetry-collector-contrib). I have personally found navigating the \'contrib\' archive incredibly useful for finding techniques that I didn\'t know existed. This makes the OpenTelemetry Collector an invaluable tool for organizations looking to standardize their observability data pipeline, reduce overhead, and seamlessly integrate with different monitoring and analysis platforms.\\n\\nLet\'s go back to our problem, how do we change the transaction names that Elastic is using to something more useful so that our HTTP GET translates to something like payment-service/login? The first thing we do is we take the full http url and consider which parts of it relate to our transaction. Looking at the span details we see a url \\n\\n```\\nmy-otel-demo-frontendproxy:8080/api/recommendations?productIds=&sessionId=45a9f3a4-39d8-47ed-bf16-01e6e81c80bc¤cyCode=\\n```\\n\\n![](/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image4.png)\\n\\n\\nNow obviously we wouldn\'t want to create transaction names that map to every single session id, that would lead to the cardinality explosion we talked about earlier, however, something like the first two parts of the url \'api/recommendations\' looks like exactly the kind of thing we need.\\n\\n### The attributes processor\\n\\nThe OpenTelemetry collector gives us a useful tool [here](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/processor/attributesprocessor), the attributes processor can help us extract parts of the url to use later in our observability pipeline. To do this is very simple, we simply build a regex like this one below. Now I should mention that I did not generate this regex myself but I used an LLM to do this for me, never fear regex again!\\n\\n```yaml\\nattributes:\\n actions:\\n - key: http.url\\n action: extract\\n pattern: \'^(?Phttps?://[^/]+(?:/[^/]+)*)(?:/(?P[^/?]+/[^/?]+))(?:\\\\?|/?$)\'\\n```\\n\\nThis configuration is doing some heavy lifting for us, so let\'s break it down:\\n\\n- We\'re using the attributes processor, which is perfect for manipulating span attributes.\\n- We\'re targeting the http.url attribute of incoming spans.\\n- The extract action tells the processor to pull out specific parts of the URL using our regex pattern.\\n\\nNow, about that regex - it\'s designed to extract two key pieces of information:\\n\\n1. `short_url`: This captures the protocol, domain, and optionally the first path segment. For example, in \\"https://example.com/api/users/profile\\", it would grab \\"https://example.com/api\\".\\n2. `url_truncated_path`: This snags the next two path segments (if they exist). In our example, it would extract \\"users/profile\\".\\n\\nWhy is this useful? Well, it allows us to create more specific transaction names based on the URL structure, without including overly specific details that could lead to cardinality explosion. For instance, we avoid capturing unique IDs or query parameters that would create a new transaction name for every single request.\\n\\nSo, if we have a URL like \\"https://example.com/api/users/profile?id=123\\", our extracted `url_truncated_path` would be \\"users/profile\\". This gives us a nice balance - it\'s more specific than just \\"HTTP GET\\", but not so specific that we end up with thousands of unique transaction names.\\n\\nNow it\'s worth mentioning here that if you don\'t have an attribute you want to use for naming your transactions it is worth looking at the options for your SDK or agent, as an example the Java automatic instrumentation Otel agent has the [following options](https://opentelemetry.io/docs/zero-code/java/agent/instrumentation/http/#capturing-http-request-and-response-headers) for capturing request and response headers. You can then subsequently use this data to name your transactions if the url is insufficient! \\n\\nIn the next steps, we\'ll see how to use this extracted information to create more meaningful span names, providing better granularity in our observability data without overwhelming our system. Remember, the goal is to enhance our visibility, not to drown in a sea of overly specific metrics!\\n\\n### The transform processor\\n\\nNow that we\'ve extracted the relevant parts of our URLs, it\'s time to put that information to good use. Enter the transform processor - our next powerful tool in the OpenTelemetry Collector pipeline.\\n\\nThe [transform processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor) allows us to modify various aspects of our telemetry data, including span names. Here\'s the configuration we\'ll use:\\n\\n```yaml\\ntransform:\\n trace_statements:\\n - context: span\\n statements:\\n - set(name, attributes[\\"url_truncated_path\\"])\\n```\\n\\nLet\'s break this down:\\n\\n- We\'re using the transform processor, which gives us fine-grained control over our spans.\\n- We\'re focusing on `trace_statements`, as we want to modify our trace spans.\\n- The `context: span` tells the processor to apply these changes to each individual span.\\n- Our statement is where the magic happens: we\'re setting the span\'s name to the value of the `url_truncated_path` attribute we extracted earlier.\\n\\nWhat does this mean in practice? Remember our previous example URL \\"https://example.com/api/users/profile?id=123\\"? Instead of a generic span name like \\"HTTP GET\\", we\'ll now have a much more informative name: \\"users/profile\\".\\n\\nThis transformation brings several benefits:\\n\\n1. Improved Readability: At a glance, you can now see what part of your application is being accessed.\\n2. Better Aggregation: You can easily group and analyze similar requests, like all operations on user profiles.\\n3. Balanced Cardinality: We\'re specific enough to be useful, but not so specific that we create a new span name for every unique URL.\\n\\nBy combining the attribute extraction we did earlier with this transformation, we\'ve created a powerful system for generating meaningful span names. This approach gives us deep insight into our application\'s behavior without the risk of cardinality explosion. \\n\\n## Putting it All Together\\n\\nThe resulting config for the OpenTelemetry collector is below remember this goes into the opentelemetry-demo/kubernetes/elastic-helm/configmap-deployment.yaml and is applied with kubectl apply -f configmap-deployment.yaml\\n\\n```yaml\\n---\\napiVersion: v1\\nkind: ConfigMap\\nmetadata:\\n name: elastic-otelcol-agent\\n namespace: default\\n labels:\\n app.kubernetes.io/name: otelcol\\n\\ndata:\\n relay: |\\n connectors:\\n spanmetrics: {}\\n exporters:\\n debug: {}\\n otlp/elastic:\\n endpoint: ${env:ELASTIC_APM_ENDPOINT}\\n compression: none\\n headers:\\n Authorization: Bearer ${ELASTIC_APM_SECRET_TOKEN}\\n extensions:\\n processors:\\n batch: {}\\n resource:\\n attributes:\\n - key: deployment.environment\\n value: \\"opentelemetry-demo\\"\\n action: upsert\\n attributes:\\n actions:\\n - key: http.url\\n action: extract\\n pattern: \'^(?Phttps?://[^/]+(?:/[^/]+)*)(?:/(?P[^/?]+/[^/?]+))(?:\\\\?|/?$)\'\\n transform:\\n trace_statements:\\n - context: span\\n statements:\\n - set(name, attributes[\\"url_truncated_path\\"])\\n receivers:\\n httpcheck/frontendproxy:\\n targets:\\n - endpoint: https://example-frontendproxy:8080\\n otlp:\\n protocols:\\n grpc:\\n endpoint: ${env:MY_POD_IP}:4317\\n http:\\n cors:\\n allowed_origins:\\n - https://*\\n - https://*\\n endpoint: ${env:MY_POD_IP}:4318\\n service:\\n extensions:\\n pipelines:\\n logs:\\n exporters:\\n - debug\\n - otlp/elastic\\n processors:\\n - batch\\n - resource\\n - attributes\\n - transform\\n receivers:\\n - otlp\\n metrics:\\n exporters:\\n - otlp/elastic\\n - debug\\n processors:\\n - batch\\n - resource\\n receivers:\\n - httpcheck/frontendproxy\\n - otlp\\n - spanmetrics\\n traces:\\n exporters:\\n - otlp/elastic\\n - debug\\n - spanmetrics\\n processors:\\n - batch\\n - resource\\n - attributes\\n - transform\\n receivers:\\n - otlp\\n telemetry:\\n metrics:\\n address: ${env:MY_POD_IP}:8888\\n```\\n\\n\\nYou\'ll notice that we tie everything together by adding our enrichment and transformations to the traces section in pipelines at the bottom of the collector config. This is the definition of our observability pipeline, bringing together all the pieces we\'ve discussed to create more meaningful and actionable telemetry data.\\n\\nBy implementing this configuration, you\'re taking a significant step towards more insightful observability. You\'re not just collecting data; you\'re refining it to provide clear, actionable insights into your application\'s performance, check out the final result below!\\n\\n![](/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image2.png)\\n\\n\\n## Ready to Take Your Observability to the Next Level?\\n\\nImplementing OpenTelemetry with Elastic Observability opens up a world of possibilities for understanding and optimizing your applications. But this is just the beginning! To further enhance your observability journey, check out these valuable resources:\\n\\n1. [Infrastructure Monitoring with OpenTelemetry in Elastic Observability](https://www.elastic.co/observability-labs/blog/infrastructure-monitoring-with-opentelemetry-in-elastic-observability)\\n2. [Explore More OpenTelemetry Content](https://www.elastic.co/observability-labs/blog/tag/opentelemetry)\\n3. [Using the OTel Operator for Injecting Java Agents](https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents)\\n4. [What is OpenTelemetry?](https://www.elastic.co/what-is/opentelemetry)\\n\\nWe encourage you to dive deeper, experiment with these configurations, and see how they can transform your observability data. Remember, the key is to find the right balance between detail and manageability.\\n\\nHave you implemented similar strategies in your observability pipeline? We\'d love to hear about your experiences and insights. Share your thoughts in the comments below or reach out to us on our community forums.\\n\\nStay tuned for Part 2 of this series, where we will look at an advanced technique for collecting more data that can help you get even more granular by collecting Span names, baggage and data for metrics using a Java plugin all without code.\\n","code":"var Component=(()=>{var d=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var i in e)r(n,i,{get:e[i],enumerable:!0})},s=(n,e,i,a)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!f.call(n,o)&&o!==i&&r(n,o,{get:()=>e[o],enumerable:!(a=u(e,o))||a.enumerable});return n};var w=(n,e,i)=>(i=n!=null?d(g(n)):{},s(e||!n||!n.__esModule?r(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>s(r({},\\"__esModule\\",{value:!0}),n);var c=y((O,l)=>{l.exports=_jsx_runtime});var T={};b(T,{default:()=>p,frontmatter:()=>x});var t=w(c()),x={title:\\"Tailoring span names and enriching spans without changing code with OpenTelemetry - Part 1\\",slug:\\"tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry\\",date:\\"2024-08-26\\",description:\\"The OpenTelemetry Collector offers powerful capabilities to enrich and refine telemetry data before it reaches your observability tools. In this blog post, we\'ll explore how to leverage the Collector to create more meaningful transaction names in Elastic Observability, significantly enhancing the value of your monitoring data.\\",author:[{slug:\\"david-hope\\"}],image:\\"tailor.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"instrumentation\\"}]};function h(n){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"The OpenTelemetry Collector offers powerful capabilities to enrich and refine telemetry data before it reaches your observability tools. In this blog post, we\'ll explore how to leverage the Collector to create more meaningful transaction names in Elastic Observability, significantly enhancing the value of your monitoring data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\'Consider this scenario: You have a transaction labeled simply as \\"HTTP GET\\" with an average response time of 5ms. However, this generic label masks a variety of distinct operations \\\\u2013 payment processing, user logins, and adding items to a cart. Does that 5ms average truly represent the performance of these diverse actions? Clearly not.\'}),`\\n`,(0,t.jsx)(e.p,{children:\\"The other problem that happens is that span traces become all mixed up so that login spans and image serving spans all become part of the same bucket, this makes things like latency correlation analysis hard in Elastic.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We\'ll focus on a specific technique using the collector\'s attributes, and transform processors to extract meaningful information from HTTP URLs and use it to create more descriptive span names. This approach not only improves the accuracy of your metrics but also enhances your ability to quickly identify and troubleshoot performance issues across your microservices architecture.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"By using these processors in combination, we can quickly address the issue of overly generic transaction names, creating more granular and informative identifiers that provide accurate visibility into your services\' performance.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"However, it\'s crucial to approach this technique with caution. While more detailed transaction names can significantly improve observability, they can also lead to an unexpected challenge: cardinality explosion. As we dive into the implementation details, we\'ll also discuss how to strike the right balance between granularity and manageability, ensuring that our solution enhances rather than overwhelms our observability stack.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the following sections, we\'ll walk through the configuration step-by-step, explaining how each processor contributes to our goal, and highlighting best practices to avoid potential pitfalls like cardinality issues. Whether you\'re new to OpenTelemetry or looking to optimize your existing setup, this guide will help you unlock more meaningful insights from your telemetry data.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"prerequisites-and-configuration\\",children:\\"Prerequisites and configuration\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If you plan on following this blog, here are some of the components and details we used to set up the configuration:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Ensure you have an account on Elastic Cloud and a deployed stack (see instructions \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/\\",rel:\\"nofollow\\",children:\\"here\\"}),\\").\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"I am also using the OpenTelemetry demo in my environment, this is important to follow along with as this demo has the specific issue I want to address. You should clone the repository and follow the instructions \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/opentelemetry-demo\\",rel:\\"nofollow\\",children:\\"here\\"}),\\" to get this up and running. I recommend using Kubernetes and I will be doing this in my AWS EKS (Elastic Kubernetes Service) environment.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"the-opentelemetry-demo\\",children:\\"The OpenTelemetry Demo\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The OpenTelemetry Demo is a comprehensive, microservices-based application designed to showcase the capabilities and best practices of OpenTelemetry instrumentation. It simulates an e-commerce platform, incorporating various services such as frontend, cart, checkout, and payment processing. This demo serves as an excellent learning tool and reference implementation for developers and organizations looking to adopt OpenTelemetry.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The demo application generates traces, metrics, and logs across its interconnected services, demonstrating how OpenTelemetry can provide deep visibility into complex, distributed systems. It\'s particularly useful for experimenting with different collection, processing, and visualization techniques, making it an ideal playground for exploring observability concepts and tools like the OpenTelemetry Collector.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"By using real-world scenarios and common architectural patterns, the OpenTelemetry Demo helps users understand how to effectively implement observability in their own applications and how to leverage the data for performance optimization and troubleshooting.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you have an Elastic Cloud instance and you fire up the OpenTelemetry demo, you should see something like this on the Elastic Service Map page:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image3.png\\",alt:\\"\\",width:\\"1918\\",height:\\"985\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Navigating to the traces page will give you the following set up.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image1.png\\",alt:\\"\\",width:\\"1911\\",height:\\"988\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see there are some very broad transaction names here like HTTP GET and the averages will not be very accurate for specific business functions within your services as shown.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image6.png\\",alt:\\"\\",width:\\"1916\\",height:\\"990\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"So let\'s fix that with the OpenTelemetry Collector.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"the-opentelemetry-collector\\",children:\\"The OpenTelemetry Collector\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The OpenTelemetry Collector is a vital component in the OpenTelemetry ecosystem, serving as a vendor-agnostic way to receive, process, and export telemetry data. It acts as a centralized observability pipeline that can collect traces, metrics, and logs from various sources, then transform and route this data to multiple backend systems.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The collector\'s flexible architecture allows for easy configuration and extension through a wide range of receivers, processors, and exporters which you can explore over \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib\\",rel:\\"nofollow\\",children:\\"here\\"}),\\". I have personally found navigating the \'contrib\' archive incredibly useful for finding techniques that I didn\'t know existed. This makes the OpenTelemetry Collector an invaluable tool for organizations looking to standardize their observability data pipeline, reduce overhead, and seamlessly integrate with different monitoring and analysis platforms.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\'s go back to our problem, how do we change the transaction names that Elastic is using to something more useful so that our HTTP GET translates to something like payment-service/login? The first thing we do is we take the full http url and consider which parts of it relate to our transaction. Looking at the span details we see a url\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`my-otel-demo-frontendproxy:8080/api/recommendations?productIds=&sessionId=45a9f3a4-39d8-47ed-bf16-01e6e81c80bc¤cyCode=\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image4.png\\",alt:\\"\\",width:\\"1919\\",height:\\"985\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now obviously we wouldn\'t want to create transaction names that map to every single session id, that would lead to the cardinality explosion we talked about earlier, however, something like the first two parts of the url \'api/recommendations\' looks like exactly the kind of thing we need.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"the-attributes-processor\\",children:\\"The attributes processor\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The OpenTelemetry collector gives us a useful tool \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/processor/attributesprocessor\\",rel:\\"nofollow\\",children:\\"here\\"}),\\", the attributes processor can help us extract parts of the url to use later in our observability pipeline. To do this is very simple, we simply build a regex like this one below. Now I should mention that I did not generate this regex myself but I used an LLM to do this for me, never fear regex again!\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`attributes:\\n actions:\\n - key: http.url\\n action: extract\\n pattern: \'^(?Phttps?://[^/]+(?:/[^/]+)*)(?:/(?P[^/?]+/[^/?]+))(?:\\\\\\\\?|/?$)\'\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"This configuration is doing some heavy lifting for us, so let\'s break it down:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"We\'re using the attributes processor, which is perfect for manipulating span attributes.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"We\'re targeting the http.url attribute of incoming spans.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"The extract action tells the processor to pull out specific parts of the URL using our regex pattern.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now, about that regex - it\'s designed to extract two key pieces of information:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"short_url\\"}),\': This captures the protocol, domain, and optionally the first path segment. For example, in \\"\',(0,t.jsx)(e.a,{href:\\"https://example.com/api/users/profile\\",rel:\\"nofollow\\",children:\\"https://example.com/api/users/profile\\"}),\'\\", it would grab \\"\',(0,t.jsx)(e.a,{href:\\"https://example.com/api\\",rel:\\"nofollow\\",children:\\"https://example.com/api\\"}),\'\\".\']}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.code,{children:\\"url_truncated_path\\"}),\': This snags the next two path segments (if they exist). In our example, it would extract \\"users/profile\\".\']}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Why is this useful? Well, it allows us to create more specific transaction names based on the URL structure, without including overly specific details that could lead to cardinality explosion. For instance, we avoid capturing unique IDs or query parameters that would create a new transaction name for every single request.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\'So, if we have a URL like \\"\',(0,t.jsx)(e.a,{href:\\"https://example.com/api/users/profile?id=123\\",rel:\\"nofollow\\",children:\\"https://example.com/api/users/profile?id=123\\"}),\'\\", our extracted \',(0,t.jsx)(e.code,{children:\\"url_truncated_path\\"}),` would be \\"users/profile\\". This gives us a nice balance - it\'s more specific than just \\"HTTP GET\\", but not so specific that we end up with thousands of unique transaction names.`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Now it\'s worth mentioning here that if you don\'t have an attribute you want to use for naming your transactions it is worth looking at the options for your SDK or agent, as an example the Java automatic instrumentation Otel agent has the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/zero-code/java/agent/instrumentation/http/#capturing-http-request-and-response-headers\\",rel:\\"nofollow\\",children:\\"following options\\"}),\\" for capturing request and response headers. You can then subsequently use this data to name your transactions if the url is insufficient!\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"In the next steps, we\'ll see how to use this extracted information to create more meaningful span names, providing better granularity in our observability data without overwhelming our system. Remember, the goal is to enhance our visibility, not to drown in a sea of overly specific metrics!\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"the-transform-processor\\",children:\\"The transform processor\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now that we\'ve extracted the relevant parts of our URLs, it\'s time to put that information to good use. Enter the transform processor - our next powerful tool in the OpenTelemetry Collector pipeline.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor\\",rel:\\"nofollow\\",children:\\"transform processor\\"}),\\" allows us to modify various aspects of our telemetry data, including span names. Here\'s the configuration we\'ll use:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`transform:\\n trace_statements:\\n - context: span\\n statements:\\n - set(name, attributes[\\"url_truncated_path\\"])\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\'s break this down:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"We\'re using the transform processor, which gives us fine-grained control over our spans.\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"We\'re focusing on \\",(0,t.jsx)(e.code,{children:\\"trace_statements\\"}),\\", as we want to modify our trace spans.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"The \\",(0,t.jsx)(e.code,{children:\\"context: span\\"}),\\" tells the processor to apply these changes to each individual span.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Our statement is where the magic happens: we\'re setting the span\'s name to the value of the \\",(0,t.jsx)(e.code,{children:\\"url_truncated_path\\"}),\\" attribute we extracted earlier.\\"]}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\'What does this mean in practice? Remember our previous example URL \\"\',(0,t.jsx)(e.a,{href:\\"https://example.com/api/users/profile?id=123\\",rel:\\"nofollow\\",children:\\"https://example.com/api/users/profile?id=123\\"}),`\\"? Instead of a generic span name like \\"HTTP GET\\", we\'ll now have a much more informative name: \\"users/profile\\".`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This transformation brings several benefits:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Improved Readability: At a glance, you can now see what part of your application is being accessed.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Better Aggregation: You can easily group and analyze similar requests, like all operations on user profiles.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Balanced Cardinality: We\'re specific enough to be useful, but not so specific that we create a new span name for every unique URL.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"By combining the attribute extraction we did earlier with this transformation, we\'ve created a powerful system for generating meaningful span names. This approach gives us deep insight into our application\'s behavior without the risk of cardinality explosion.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"putting-it-all-together\\",children:\\"Putting it All Together\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The resulting config for the OpenTelemetry collector is below remember this goes into the opentelemetry-demo/kubernetes/elastic-helm/configmap-deployment.yaml and is applied with kubectl apply -f configmap-deployment.yaml\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`---\\napiVersion: v1\\nkind: ConfigMap\\nmetadata:\\n name: elastic-otelcol-agent\\n namespace: default\\n labels:\\n app.kubernetes.io/name: otelcol\\n\\ndata:\\n relay: |\\n connectors:\\n spanmetrics: {}\\n exporters:\\n debug: {}\\n otlp/elastic:\\n endpoint: \\\\${env:ELASTIC_APM_ENDPOINT}\\n compression: none\\n headers:\\n Authorization: Bearer \\\\${ELASTIC_APM_SECRET_TOKEN}\\n extensions:\\n processors:\\n batch: {}\\n resource:\\n attributes:\\n - key: deployment.environment\\n value: \\"opentelemetry-demo\\"\\n action: upsert\\n attributes:\\n actions:\\n - key: http.url\\n action: extract\\n pattern: \'^(?Phttps?://[^/]+(?:/[^/]+)*)(?:/(?P[^/?]+/[^/?]+))(?:\\\\\\\\?|/?$)\'\\n transform:\\n trace_statements:\\n - context: span\\n statements:\\n - set(name, attributes[\\"url_truncated_path\\"])\\n receivers:\\n httpcheck/frontendproxy:\\n targets:\\n - endpoint: https://example-frontendproxy:8080\\n otlp:\\n protocols:\\n grpc:\\n endpoint: \\\\${env:MY_POD_IP}:4317\\n http:\\n cors:\\n allowed_origins:\\n - https://*\\n - https://*\\n endpoint: \\\\${env:MY_POD_IP}:4318\\n service:\\n extensions:\\n pipelines:\\n logs:\\n exporters:\\n - debug\\n - otlp/elastic\\n processors:\\n - batch\\n - resource\\n - attributes\\n - transform\\n receivers:\\n - otlp\\n metrics:\\n exporters:\\n - otlp/elastic\\n - debug\\n processors:\\n - batch\\n - resource\\n receivers:\\n - httpcheck/frontendproxy\\n - otlp\\n - spanmetrics\\n traces:\\n exporters:\\n - otlp/elastic\\n - debug\\n - spanmetrics\\n processors:\\n - batch\\n - resource\\n - attributes\\n - transform\\n receivers:\\n - otlp\\n telemetry:\\n metrics:\\n address: \\\\${env:MY_POD_IP}:8888\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"You\'ll notice that we tie everything together by adding our enrichment and transformations to the traces section in pipelines at the bottom of the collector config. This is the definition of our observability pipeline, bringing together all the pieces we\'ve discussed to create more meaningful and actionable telemetry data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"By implementing this configuration, you\'re taking a significant step towards more insightful observability. You\'re not just collecting data; you\'re refining it to provide clear, actionable insights into your application\'s performance, check out the final result below!\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/image2.png\\",alt:\\"\\",width:\\"1918\\",height:\\"986\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"ready-to-take-your-observability-to-the-next-level\\",children:\\"Ready to Take Your Observability to the Next Level?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Implementing OpenTelemetry with Elastic Observability opens up a world of possibilities for understanding and optimizing your applications. But this is just the beginning! To further enhance your observability journey, check out these valuable resources:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/infrastructure-monitoring-with-opentelemetry-in-elastic-observability\\",rel:\\"nofollow\\",children:\\"Infrastructure Monitoring with OpenTelemetry in Elastic Observability\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/tag/opentelemetry\\",rel:\\"nofollow\\",children:\\"Explore More OpenTelemetry Content\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents\\",rel:\\"nofollow\\",children:\\"Using the OTel Operator for Injecting Java Agents\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/what-is/opentelemetry\\",rel:\\"nofollow\\",children:\\"What is OpenTelemetry?\\"})}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"We encourage you to dive deeper, experiment with these configurations, and see how they can transform your observability data. Remember, the key is to find the right balance between detail and manageability.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Have you implemented similar strategies in your observability pipeline? We\'d love to hear about your experiences and insights. Share your thoughts in the comments below or reach out to us on our community forums.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Stay tuned for Part 2 of this series, where we will look at an advanced technique for collecting more data that can help you get even more granular by collecting Span names, baggage and data for metrics using a Java plugin all without code.\\"})]})}function p(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}return v(T);})();\\n;return Component;"},"_id":"articles/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry.mdx","sourceFileName":"tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry"},"type":"Article","imageUrl":"/assets/images/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry/tailor.jpg","readingTime":"15 min read","url":"/tailoring-span-names-and-enriching-spans-without-changing-code-with-opentelemetry","headings":[{"level":2,"title":"Prerequisites and configuration","href":"#prerequisites-and-configuration"},{"level":3,"title":"The OpenTelemetry Demo","href":"#the-opentelemetry-demo"},{"level":2,"title":"The OpenTelemetry Collector","href":"#the-opentelemetry-collector"},{"level":3,"title":"The attributes processor","href":"#the-attributes-processor"},{"level":3,"title":"The transform processor","href":"#the-transform-processor"},{"level":2,"title":"Putting it All Together","href":"#putting-it-all-together"},{"level":2,"title":"Ready to Take Your Observability to the Next Level?","href":"#ready-to-take-your-observability-to-the-next-level"}]},{"title":"Trace your Azure Function application with Elastic Observability","slug":"trace-azure-function-application-observability","date":"2023-05-16","description":"Serverless applications deployed on Azure Functions are growing in usage. This blog shows how to deploy a serverless application on Azure functions with Elastic Agent and use Elastic\'s APM capability to manage and troubleshoot issues.","image":"09-road.jpeg","author":[{"slug":"trent-mick","type":"Author","_raw":{}},{"slug":"bahubali-shetti","type":"Author","_raw":{}},{"slug":"hemant-malik","type":"Author","_raw":{}}],"tags":[{"slug":"cloud-monitoring","type":"Tag","_raw":{}},{"slug":"serverless","type":"Tag","_raw":{}},{"slug":"azure","type":"Tag","_raw":{}},{"slug":"distributed-tracing","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nAdoption of Azure Functions in cloud-native applications on Microsoft Azure has been increasing exponentially over the last few years. Serverless functions, such as the Azure Functions, provide a high level of abstraction from the underlying infrastructure and orchestration, given these tasks are managed by the cloud provider. Software development teams can then focus on the implementation of business and application logic. Some additional benefits include billing for serverless functions based on the actual compute and memory resources consumed, along with automatic on-demand scaling.\\n\\nWhile the benefits of using serverless functions are manifold, it is also necessary to make them observable in the wider end-to-end microservices architecture context.\\n\\n## Elastic Observability (APM) for Azure Functions: The architecture\\n\\n[Elastic Observability 8.7](https://www.elastic.co/blog/whats-new-elastic-observability-8-7-0) introduced distributed tracing for Microsoft Azure Functions — available for the Elastic APM Agents for .NET, Node.js, and Python. Auto-instrumentation of HTTP requests is supported out-of-the-box, enabling the detection of performance bottlenecks and sources of errors.\\n\\nThe key components of the solution for observing Azure Functions are:\\n\\n1. The Elastic APM Agent for the relevant language\\n2. Elastic Observability\\n\\n![azure function](/assets/images/trace-azure-function-application-observability/blog-elastic-azure-function.png)\\n\\nThe APM server validates and processes incoming events from individual APM Agents and transforms them into Elasticsearch documents. The APM Agent provides auto-instrumentation capabilities for the application being observed. The Node.js APM Agent can trace function invocations in an Azure Functions app.\\n\\n## Setting up Elastic APM for Azure Functions\\n\\nTo demonstrate the setup and usage of Elastic APM, we will use a [sample Node.js application](https://github.com/elastic/azure-functions-apm-nodejs-sample-app).\\n\\n### Application overview\\n\\nThe Node.js application has two [HTTP-triggered](https://learn.microsoft.com/en-us/azure/azure-functions/functions-bindings-http-webhook) functions named \\"[Hello](https://github.com/elastic/azure-functions-apm-nodejs-sample-app/blob/main/Hello/index.js)\\" and \\"[Goodbye](https://github.com/elastic/azure-functions-apm-nodejs-sample-app/blob/main/Goodbye/index.js).\\" Once deployed, they can be called as follows, and tracing data will be sent to the configured Elastic Observability deployment.\\n\\n```bash\\ncurl -i https://.azurewebsites.net/api/hello\\ncurl -i https://.azurewebsites.net/api/goodbye\\n```\\n\\n### Setup\\n\\n**Step 0. Prerequisites**\\n\\nTo run the sample application, you will need:\\n\\n- An installation of [Node.js](https://nodejs.org/) (v14 or later)\\n- Access to an Azure subscription with an appropriate role to create resources\\n- The [Azure CLI (az)](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) logged into an Azure subscription\\n\\n 1. Use az login to login\\n 2. See the output of az account show\\n\\n- The [Azure Functions Core Tools (func)](https://learn.microsoft.com/en-us/azure/azure-functions/functions-run-local?tabs=v4%2Cwindows%2Ccsharp%2Cportal%2Cbash#install-the-azure-functions-core-tools) (func --version should show a 4.x version)\\n- An Elastic Observability deployment to which monitoring data will be sent\\n\\n 1. The simplest way to get started with Elastic APM Microsoft Azure is through Elastic Cloud. [Get started with Elastic Cloud on Azure Marketplace](https://www.elastic.co/guide/en/elastic-stack-deploy/current/azure-marketplace-getting-started.html) or [sign up for a trial on Elastic Cloud](https://www.elastic.co/cloud/elasticsearch-service/signup).\\n\\n- The APM server URL (serverUrl) and secret token (secretToken) from your Elastic stack deployment for configuration below\\n\\n 1. [How to get the serverUrl and secretToken documentation](https://www.elastic.co/guide/en/apm/guide/8.7/install-and-run.html)\\n\\n**Step 1. Clone the sample application repo and install dependencies**\\n\\n```bash\\ngit clone https://github.com/elastic/azure-functions-apm-nodejs-sample-app.git\\ncd azure-functions-apm-nodejs-sample-app\\nnpm install\\n```\\n\\n**Step 2. Deploy the Azure Function App** \\nCaution icon! Deploying a function app to Azure can incur [costs](https://azure.microsoft.com/en-us/pricing/details/functions/). The following setup uses the free tier of Azure Functions. Step 5 covers the clean-up of resources.\\n\\n**Step 2.1** \\nTo avoid name collisions with others that have independently run this demo, we need a short unique identifier for some resource names that need to be globally unique. We\'ll call it the DEMO_ID. You can run the following to generate one and save it to DEMO_ID and the \\"demo-id\\" file.\\n\\n```bash\\nif [[ ! -f demo-id ]]; then node -e \'console.log(crypto.randomBytes(3).toString(\\"hex\\"))\' >demo-id; fi\\nexport DEMO_ID=$(cat demo-id)\\necho $DEMO_ID\\n```\\n\\n**Step 2.2** \\nBefore you can deploy to Azure, you will need to create some Azure resources: a Resource Group, Storage Account, and the Function App. For this demo, you can use the following commands. (See [this Azure docs section](https://learn.microsoft.com/en-us/azure/azure-functions/create-first-function-cli-node#create-supporting-azure-resources-for-your-function) for more details.)\\n\\n```bash\\nREGION=westus2 # Or use another region listed in \'az account list-locations\'.\\naz group create --name \\"AzureFnElasticApmNodeSample-rg\\" --location \\"$REGION\\"\\naz storage account create --name \\"eapmdemostor${DEMO_ID}\\" --location \\"$REGION\\" \\\\\\n --resource-group \\"AzureFnElasticApmNodeSample-rg\\" --sku Standard_LRS\\naz functionapp create --name \\"azure-functions-apm-nodejs-sample-app-${DEMO_ID}\\" \\\\\\n --resource-group \\"AzureFnElasticApmNodeSample-rg\\" \\\\\\n --consumption-plan-location \\"$REGION\\" --runtime node --runtime-version 18 \\\\\\n --functions-version 4 --storage-account \\"eapmdemostor${DEMO_ID}\\"\\n```\\n\\n**Step 2.3** \\nNext, configure your Function App with the APM server URL and secret token for your Elastic deployment. This can be done in the [Azure Portal](https://portal.azure.com/) or with the az CLI.\\n\\nIn the Azure portal, browse to your Function App, then its Application Settings ([Azure user guide](https://learn.microsoft.com/en-us/azure/azure-functions/functions-how-to-use-azure-function-app-settings?tabs=portal#settings)). You\'ll need to add two settings:\\n\\nFirst set your APM URL and token.\\n\\n```bash\\nexport ELASTIC_APM_SERVER_URL=\\"\\"\\nexport ELASTIC_APM_SECRET_TOKEN=\\"\\"\\n```\\n\\nOr you can use the az functionapp config appsettings set ... CLI command as follows:\\n\\n```bash\\naz functionapp config appsettings set \\\\\\n -g \\"AzureFnElasticApmNodeSample-rg\\" -n \\"azure-functions-apm-nodejs-sample-app-${DEMO_ID}\\" \\\\\\n --settings \\"ELASTIC_APM_SERVER_URL=${ELASTIC_APM_SERVER_URL}\\"\\naz functionapp config appsettings set \\\\\\n -g \\"AzureFnElasticApmNodeSample-rg\\" -n \\"azure-functions-apm-nodejs-sample-app-${DEMO_ID}\\" \\\\\\n --settings \\"ELASTIC_APM_SECRET_TOKEN=${ELASTIC_APM_SECRET_TOKEN}\\"\\n```\\n\\nThe ELASTIC_APM_SERVER_URL and ELASTIC_APM_SECRET_TOKEN are set in Azure function’s settings for the app and used by the Elastic APM Agent. This is initiated by the initapm.js file, which starts the Elastic APM agent with:\\n\\n```javascript\\nrequire(\\"elastic-apm-node\\").start();\\n```\\n\\nWhen you log in to Azure and look at the function’s configuration, you will see them set:\\n\\n![azure functions application settings](/assets/images/trace-azure-function-application-observability/blog-elastic-azure-functions-application-settings.png)\\n\\n**Step 2.4** \\nNow you can publish your app. (Re-run this command every time you make a code change.)\\n\\n```bash\\nfunc azure functionapp publish \\"azure-functions-apm-nodejs-sample-app-${DEMO_ID}\\"\\n```\\n\\nYou should log in to Azure to see the function running.\\n\\n![azure function app](/assets/images/trace-azure-function-application-observability/blog-elastic-azure-function-app.png)\\n\\n**Step 3. Try it out**\\n\\n```bash\\n% curl https://azure-functions-apm-nodejs-sample-app-${DEMO_ID}.azurewebsites.net/api/Hello\\n{\\"message\\":\\"Hello.\\"}\\n% curl https://azure-functions-apm-nodejs-sample-app-${DEMO_ID}.azurewebsites.net/api/Goodbye\\n{\\"message\\":\\"Goodbye.\\"}\\n```\\n\\nIn a few moments, the APM app in your Elastic deployment will show tracing data for your Azure Function app.\\n\\n**Step 4. Apply some load to your app** \\nTo get some more interesting data, you can run the following to generate some load on your deployed function app:\\n\\n```bash\\nnpm run loadgen\\n```\\n\\nThis uses the [autocannon](https://github.com/mcollina/autocannon) node package to generate some light load (2 concurrent users, each calling at 5 requests/s for 60s) on the \\"Goodbye\\" function.\\n\\n**Step 5. Clean up resources** \\nIf you deployed to Azure, you should make sure to delete any resources so you don\'t incur any costs.\\n\\n```bash\\naz group delete --name \\"AzureFnElasticApmNodeSample-rg\\"\\n```\\n\\n## Analyzing Azure Function APM data in Elastic\\n\\nOnce you have successfully set up the sample application and started generating load, you should see APM data appearing in the Elastic Observability APM Services capability.\\n\\n## Service map\\n\\nWith the default setup, you will see two services in the APM Service map.\\n\\nThe main function: azure-functions-apm-nodejs-sample-app\\n\\nAnd the end point where your function is accessible: azure-functions-apm-nodejs-sample-app-ec7d4c.azurewebsites.net\\n\\nYou will see that there is a connection between the two as your application is taking requests and answering through the endpoint.\\n\\n![observability services](/assets/images/trace-azure-function-application-observability/blog-elastic-observability-services.png)\\n\\nFrom the [APM Service](https://www.elastic.co/observability/application-performance-monitoring) map you can further investigate the function, analyze traces, look at logs, and more.\\n\\n### Service details\\n\\nWhen we dive into the details, we can see several items.\\n\\n![observability azure functions apm](/assets/images/trace-azure-function-application-observability/blog-elastic-observability-azure-functions-apm.png)\\n\\n- Latency for the recent load we ran against the application\\n- Transactions (Goodbye and Hello)\\n- Average throughput\\n- And more\\n\\n### Transaction details\\n\\nWe can see transaction details.\\n\\n![observability get api goodbye](/assets/images/trace-azure-function-application-observability/blog-elastic-observability-get-api-goodbye.png)\\n\\nAn individual trace shows us that the \\"Goodbye\\" function [calls the \\"Hello\\" function](https://github.com/elastic/azure-functions-apm-nodejs-sample-app/blob/main/Goodbye/index.js#L6-L10) in the same function app before returning:\\n\\n![latency distribution trace sample](/assets/images/trace-azure-function-application-observability/blog-elastic-latency-distribution-trace-sample.png)\\n\\n### Machine learning based latency correlation\\n\\nAs we’ve mentioned in other blogs, we can also correlate issues such as higher than normal latency. Since we see a spike at 1s, we run the embedded latency correlation, which uses machine learning to help analyze the potential impacting component by analyzing logs, metrics, and traces.\\n\\n![latency distribution correlations](/assets/images/trace-azure-function-application-observability/blog-elastic-latency-distribution-correlations.png)\\n\\nThe correlation indicated there is a potential cause (25%) due to the host sending the load (my machine).\\n\\n### Cold start detection\\n\\nAlso, we can see the impact a [cold start](https://azure.microsoft.com/en-ca/blog/understanding-serverless-cold-start/) can have on the latency of a request:\\n\\n![trace sample](/assets/images/trace-azure-function-application-observability/blog-elastic-trace-sample.png)\\n\\n## Summary\\n\\nElastic Observability provides real-time monitoring of Azure Functions in your production environment for a broad range of use cases. Curated dashboards assist DevOps teams in performing root cause analysis for performance bottlenecks and errors. SRE teams can quickly view upstream and downstream dependencies, as well as perform analyses in the context of distributed microservices architecture.\\n\\n## Learn more\\n\\nTo learn how to add the Elastic APM Agent to an existing Node.js Azure Function app, read [Monitoring Node.js Azure Functions](https://www.elastic.co/guide/en/apm/agent/nodejs/master/azure-functions.html). Additional resources include:\\n\\n- [How to deploy and manage Elastic Observability on Microsoft Azure](https://www.elastic.co/blog/getting-started-with-the-azure-integration-enhancement)\\n- [Elastic APM Quickstart](https://www.elastic.co/guide/en/apm/guide/current/apm-quick-start.html)\\n","code":"var Component=(()=>{var h=Object.create;var o=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var b=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),w=(t,e)=>{for(var i in e)o(t,i,{get:e[i],enumerable:!0})},s=(t,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!f.call(t,a)&&a!==i&&o(t,a,{get:()=>e[a],enumerable:!(r=p(e,a))||r.enumerable});return t};var y=(t,e,i)=>(i=t!=null?h(m(t)):{},s(e||!t||!t.__esModule?o(i,\\"default\\",{value:t,enumerable:!0}):i,t)),A=t=>s(o({},\\"__esModule\\",{value:!0}),t);var c=b((M,l)=>{l.exports=_jsx_runtime});var v={};w(v,{default:()=>d,frontmatter:()=>z});var n=y(c()),z={title:\\"Trace your Azure Function application with Elastic Observability\\",slug:\\"trace-azure-function-application-observability\\",date:\\"2023-05-16\\",description:\\"Serverless applications deployed on Azure Functions are growing in usage. This blog shows how to deploy a serverless application on Azure functions with Elastic Agent and use Elastic\'s APM capability to manage and troubleshoot issues.\\",author:[{slug:\\"trent-mick\\"},{slug:\\"bahubali-shetti\\"},{slug:\\"hemant-malik\\"}],image:\\"09-road.jpeg\\",tags:[{slug:\\"cloud-monitoring\\"},{slug:\\"serverless\\"},{slug:\\"azure\\"},{slug:\\"distributed-tracing\\"},{slug:\\"apm\\"}]};function u(t){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsx)(e.p,{children:\\"Adoption of Azure Functions in cloud-native applications on Microsoft Azure has been increasing exponentially over the last few years. Serverless functions, such as the Azure Functions, provide a high level of abstraction from the underlying infrastructure and orchestration, given these tasks are managed by the cloud provider. Software development teams can then focus on the implementation of business and application logic. Some additional benefits include billing for serverless functions based on the actual compute and memory resources consumed, along with automatic on-demand scaling.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"While the benefits of using serverless functions are manifold, it is also necessary to make them observable in the wider end-to-end microservices architecture context.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"elastic-observability-apm-for-azure-functions-the-architecture\\",children:\\"Elastic Observability (APM) for Azure Functions: The architecture\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-observability-8-7-0\\",rel:\\"nofollow\\",children:\\"Elastic Observability 8.7\\"}),\\" introduced distributed tracing for Microsoft Azure Functions \\\\u2014 available for the Elastic APM Agents for .NET, Node.js, and Python. Auto-instrumentation of HTTP requests is supported out-of-the-box, enabling the detection of performance bottlenecks and sources of errors.\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"The key components of the solution for observing Azure Functions are:\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"The Elastic APM Agent for the relevant language\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Elastic Observability\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/trace-azure-function-application-observability/blog-elastic-azure-function.png\\",alt:\\"azure function\\",width:\\"1942\\",height:\\"856\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The APM server validates and processes incoming events from individual APM Agents and transforms them into Elasticsearch documents. The APM Agent provides auto-instrumentation capabilities for the application being observed. The Node.js APM Agent can trace function invocations in an Azure Functions app.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"setting-up-elastic-apm-for-azure-functions\\",children:\\"Setting up Elastic APM for Azure Functions\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To demonstrate the setup and usage of Elastic APM, we will use a \\",(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/azure-functions-apm-nodejs-sample-app\\",rel:\\"nofollow\\",children:\\"sample Node.js application\\"}),\\".\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"application-overview\\",children:\\"Application overview\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The Node.js application has two \\",(0,n.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/azure-functions/functions-bindings-http-webhook\\",rel:\\"nofollow\\",children:\\"HTTP-triggered\\"}),\' functions named \\"\',(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/azure-functions-apm-nodejs-sample-app/blob/main/Hello/index.js\\",rel:\\"nofollow\\",children:\\"Hello\\"}),\'\\" and \\"\',(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/azure-functions-apm-nodejs-sample-app/blob/main/Goodbye/index.js\\",rel:\\"nofollow\\",children:\\"Goodbye\\"}),\'.\\" Once deployed, they can be called as follows, and tracing data will be sent to the configured Elastic Observability deployment.\']}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`curl -i https://.azurewebsites.net/api/hello\\ncurl -i https://.azurewebsites.net/api/goodbye\\n`})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"setup\\",children:\\"Setup\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Step 0. Prerequisites\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"To run the sample application, you will need:\\"}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"An installation of \\",(0,n.jsx)(e.a,{href:\\"https://nodejs.org/\\",rel:\\"nofollow\\",children:\\"Node.js\\"}),\\" (v14 or later)\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"Access to an Azure subscription with an appropriate role to create resources\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"The \\",(0,n.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/cli/azure/install-azure-cli\\",rel:\\"nofollow\\",children:\\"Azure CLI (az)\\"}),\\" logged into an Azure subscription\\"]}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Use az login to login\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"See the output of az account show\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsxs)(e.p,{children:[\\"The \\",(0,n.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/azure-functions/functions-run-local?tabs=v4%2Cwindows%2Ccsharp%2Cportal%2Cbash#install-the-azure-functions-core-tools\\",rel:\\"nofollow\\",children:\\"Azure Functions Core Tools (func)\\"}),\\" (func --version should show a 4.x version)\\"]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"An Elastic Observability deployment to which monitoring data will be sent\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"The simplest way to get started with Elastic APM Microsoft Azure is through Elastic Cloud. \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack-deploy/current/azure-marketplace-getting-started.html\\",rel:\\"nofollow\\",children:\\"Get started with Elastic Cloud on Azure Marketplace\\"}),\\" or \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/cloud/elasticsearch-service/signup\\",rel:\\"nofollow\\",children:\\"sign up for a trial on Elastic Cloud\\"}),\\".\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsxs)(e.li,{children:[`\\n`,(0,n.jsx)(e.p,{children:\\"The APM server URL (serverUrl) and secret token (secretToken) from your Elastic stack deployment for configuration below\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/8.7/install-and-run.html\\",rel:\\"nofollow\\",children:\\"How to get the serverUrl and secretToken documentation\\"})}),`\\n`]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Step 1. Clone the sample application repo and install dependencies\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/elastic/azure-functions-apm-nodejs-sample-app.git\\ncd azure-functions-apm-nodejs-sample-app\\nnpm install\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Step 2. Deploy the Azure Function App\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"Caution icon! Deploying a function app to Azure can incur \\",(0,n.jsx)(e.a,{href:\\"https://azure.microsoft.com/en-us/pricing/details/functions/\\",rel:\\"nofollow\\",children:\\"costs\\"}),\\". The following setup uses the free tier of Azure Functions. Step 5 covers the clean-up of resources.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Step 2.1\\"}),(0,n.jsx)(e.br,{}),`\\n`,`To avoid name collisions with others that have independently run this demo, we need a short unique identifier for some resource names that need to be globally unique. We\'ll call it the DEMO_ID. You can run the following to generate one and save it to DEMO_ID and the \\"demo-id\\" file.`]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`if [[ ! -f demo-id ]]; then node -e \'console.log(crypto.randomBytes(3).toString(\\"hex\\"))\' >demo-id; fi\\nexport DEMO_ID=$(cat demo-id)\\necho $DEMO_ID\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Step 2.2\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"Before you can deploy to Azure, you will need to create some Azure resources: a Resource Group, Storage Account, and the Function App. For this demo, you can use the following commands. (See \\",(0,n.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/azure-functions/create-first-function-cli-node#create-supporting-azure-resources-for-your-function\\",rel:\\"nofollow\\",children:\\"this Azure docs section\\"}),\\" for more details.)\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`REGION=westus2 # Or use another region listed in \'az account list-locations\'.\\naz group create --name \\"AzureFnElasticApmNodeSample-rg\\" --location \\"$REGION\\"\\naz storage account create --name \\"eapmdemostor\\\\${DEMO_ID}\\" --location \\"$REGION\\" \\\\\\\\\\n --resource-group \\"AzureFnElasticApmNodeSample-rg\\" --sku Standard_LRS\\naz functionapp create --name \\"azure-functions-apm-nodejs-sample-app-\\\\${DEMO_ID}\\" \\\\\\\\\\n --resource-group \\"AzureFnElasticApmNodeSample-rg\\" \\\\\\\\\\n --consumption-plan-location \\"$REGION\\" --runtime node --runtime-version 18 \\\\\\\\\\n --functions-version 4 --storage-account \\"eapmdemostor\\\\${DEMO_ID}\\"\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Step 2.3\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"Next, configure your Function App with the APM server URL and secret token for your Elastic deployment. This can be done in the \\",(0,n.jsx)(e.a,{href:\\"https://portal.azure.com/\\",rel:\\"nofollow\\",children:\\"Azure Portal\\"}),\\" or with the az CLI.\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"In the Azure portal, browse to your Function App, then its Application Settings (\\",(0,n.jsx)(e.a,{href:\\"https://learn.microsoft.com/en-us/azure/azure-functions/functions-how-to-use-azure-function-app-settings?tabs=portal#settings\\",rel:\\"nofollow\\",children:\\"Azure user guide\\"}),\\"). You\'ll need to add two settings:\\"]}),`\\n`,(0,n.jsx)(e.p,{children:\\"First set your APM URL and token.\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`export ELASTIC_APM_SERVER_URL=\\"\\"\\nexport ELASTIC_APM_SECRET_TOKEN=\\"\\"\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"Or you can use the az functionapp config appsettings set ... CLI command as follows:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az functionapp config appsettings set \\\\\\\\\\n -g \\"AzureFnElasticApmNodeSample-rg\\" -n \\"azure-functions-apm-nodejs-sample-app-\\\\${DEMO_ID}\\" \\\\\\\\\\n --settings \\"ELASTIC_APM_SERVER_URL=\\\\${ELASTIC_APM_SERVER_URL}\\"\\naz functionapp config appsettings set \\\\\\\\\\n -g \\"AzureFnElasticApmNodeSample-rg\\" -n \\"azure-functions-apm-nodejs-sample-app-\\\\${DEMO_ID}\\" \\\\\\\\\\n --settings \\"ELASTIC_APM_SECRET_TOKEN=\\\\${ELASTIC_APM_SECRET_TOKEN}\\"\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The ELASTIC_APM_SERVER_URL and ELASTIC_APM_SECRET_TOKEN are set in Azure function\\\\u2019s settings for the app and used by the Elastic APM Agent. This is initiated by the initapm.js file, which starts the Elastic APM agent with:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-javascript\\",children:`require(\\"elastic-apm-node\\").start();\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"When you log in to Azure and look at the function\\\\u2019s configuration, you will see them set:\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/trace-azure-function-application-observability/blog-elastic-azure-functions-application-settings.png\\",alt:\\"azure functions application settings\\",width:\\"1389\\",height:\\"518\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Step 2.4\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"Now you can publish your app. (Re-run this command every time you make a code change.)\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:\'func azure functionapp publish \\"azure-functions-apm-nodejs-sample-app-${DEMO_ID}\\"\\\\n\'})}),`\\n`,(0,n.jsx)(e.p,{children:\\"You should log in to Azure to see the function running.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/trace-azure-function-application-observability/blog-elastic-azure-function-app.png\\",alt:\\"azure function app\\",width:\\"1709\\",height:\\"890\\"})}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.strong,{children:\\"Step 3. Try it out\\"})}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`% curl https://azure-functions-apm-nodejs-sample-app-\\\\${DEMO_ID}.azurewebsites.net/api/Hello\\n{\\"message\\":\\"Hello.\\"}\\n% curl https://azure-functions-apm-nodejs-sample-app-\\\\${DEMO_ID}.azurewebsites.net/api/Goodbye\\n{\\"message\\":\\"Goodbye.\\"}\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"In a few moments, the APM app in your Elastic deployment will show tracing data for your Azure Function app.\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Step 4. Apply some load to your app\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"To get some more interesting data, you can run the following to generate some load on your deployed function app:\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`npm run loadgen\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This uses the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/mcollina/autocannon\\",rel:\\"nofollow\\",children:\\"autocannon\\"}),\' node package to generate some light load (2 concurrent users, each calling at 5 requests/s for 60s) on the \\"Goodbye\\" function.\']}),`\\n`,(0,n.jsxs)(e.p,{children:[(0,n.jsx)(e.strong,{children:\\"Step 5. Clean up resources\\"}),(0,n.jsx)(e.br,{}),`\\n`,\\"If you deployed to Azure, you should make sure to delete any resources so you don\'t incur any costs.\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{className:\\"language-bash\\",children:`az group delete --name \\"AzureFnElasticApmNodeSample-rg\\"\\n`})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"analyzing-azure-function-apm-data-in-elastic\\",children:\\"Analyzing Azure Function APM data in Elastic\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Once you have successfully set up the sample application and started generating load, you should see APM data appearing in the Elastic Observability APM Services capability.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"service-map\\",children:\\"Service map\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"With the default setup, you will see two services in the APM Service map.\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"The main function: azure-functions-apm-nodejs-sample-app\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"And the end point where your function is accessible: azure-functions-apm-nodejs-sample-app-ec7d4c.azurewebsites.net\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"You will see that there is a connection between the two as your application is taking requests and answering through the endpoint.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/trace-azure-function-application-observability/blog-elastic-observability-services.png\\",alt:\\"observability services\\",width:\\"1690\\",height:\\"963\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"From the \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"APM Service\\"}),\\" map you can further investigate the function, analyze traces, look at logs, and more.\\"]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"service-details\\",children:\\"Service details\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"When we dive into the details, we can see several items.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/trace-azure-function-application-observability/blog-elastic-observability-azure-functions-apm.png\\",alt:\\"observability azure functions apm\\",width:\\"1700\\",height:\\"993\\"})}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"Latency for the recent load we ran against the application\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Transactions (Goodbye and Hello)\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"Average throughput\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"And more\\"}),`\\n`]}),`\\n`,(0,n.jsx)(e.h3,{id:\\"transaction-details\\",children:\\"Transaction details\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"We can see transaction details.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/trace-azure-function-application-observability/blog-elastic-observability-get-api-goodbye.png\\",alt:\\"observability get api goodbye\\",width:\\"1700\\",height:\\"1086\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\'An individual trace shows us that the \\"Goodbye\\" function \',(0,n.jsx)(e.a,{href:\\"https://github.com/elastic/azure-functions-apm-nodejs-sample-app/blob/main/Goodbye/index.js#L6-L10\\",rel:\\"nofollow\\",children:\'calls the \\"Hello\\" function\'}),\\" in the same function app before returning:\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/trace-azure-function-application-observability/blog-elastic-latency-distribution-trace-sample.png\\",alt:\\"latency distribution trace sample\\",width:\\"1443\\",height:\\"778\\"})}),`\\n`,(0,n.jsx)(e.h3,{id:\\"machine-learning-based-latency-correlation\\",children:\\"Machine learning based latency correlation\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"As we\\\\u2019ve mentioned in other blogs, we can also correlate issues such as higher than normal latency. Since we see a spike at 1s, we run the embedded latency correlation, which uses machine learning to help analyze the potential impacting component by analyzing logs, metrics, and traces.\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/trace-azure-function-application-observability/blog-elastic-latency-distribution-correlations.png\\",alt:\\"latency distribution correlations\\",width:\\"1431\\",height:\\"624\\"})}),`\\n`,(0,n.jsx)(e.p,{children:\\"The correlation indicated there is a potential cause (25%) due to the host sending the load (my machine).\\"}),`\\n`,(0,n.jsx)(e.h3,{id:\\"cold-start-detection\\",children:\\"Cold start detection\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Also, we can see the impact a \\",(0,n.jsx)(e.a,{href:\\"https://azure.microsoft.com/en-ca/blog/understanding-serverless-cold-start/\\",rel:\\"nofollow\\",children:\\"cold start\\"}),\\" can have on the latency of a request:\\"]}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/trace-azure-function-application-observability/blog-elastic-trace-sample.png\\",alt:\\"trace sample\\",width:\\"1999\\",height:\\"588\\"})}),`\\n`,(0,n.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,n.jsx)(e.p,{children:\\"Elastic Observability provides real-time monitoring of Azure Functions in your production environment for a broad range of use cases. Curated dashboards assist DevOps teams in performing root cause analysis for performance bottlenecks and errors. SRE teams can quickly view upstream and downstream dependencies, as well as perform analyses in the context of distributed microservices architecture.\\"}),`\\n`,(0,n.jsx)(e.h2,{id:\\"learn-more\\",children:\\"Learn more\\"}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"To learn how to add the Elastic APM Agent to an existing Node.js Azure Function app, read \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/agent/nodejs/master/azure-functions.html\\",rel:\\"nofollow\\",children:\\"Monitoring Node.js Azure Functions\\"}),\\". Additional resources include:\\"]}),`\\n`,(0,n.jsxs)(e.ul,{children:[`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/blog/getting-started-with-the-azure-integration-enhancement\\",rel:\\"nofollow\\",children:\\"How to deploy and manage Elastic Observability on Microsoft Azure\\"})}),`\\n`,(0,n.jsx)(e.li,{children:(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/apm-quick-start.html\\",rel:\\"nofollow\\",children:\\"Elastic APM Quickstart\\"})}),`\\n`]})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(u,{...t})}):u(t)}return A(v);})();\\n;return Component;"},"_id":"articles/trace-azure-function-application-elastic-observability.mdx","_raw":{"sourceFilePath":"articles/trace-azure-function-application-elastic-observability.mdx","sourceFileName":"trace-azure-function-application-elastic-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/trace-azure-function-application-elastic-observability"},"type":"Article","imageUrl":"/assets/images/trace-azure-function-application-observability/09-road.jpeg","readingTime":"8 min read","url":"/trace-azure-function-application-observability","headings":[{"level":2,"title":"Elastic Observability (APM) for Azure Functions: The architecture","href":"#elastic-observability-apm-for-azure-functions-the-architecture"},{"level":2,"title":"Setting up Elastic APM for Azure Functions","href":"#setting-up-elastic-apm-for-azure-functions"},{"level":3,"title":"Application overview","href":"#application-overview"},{"level":3,"title":"Setup","href":"#setup"},{"level":2,"title":"Analyzing Azure Function APM data in Elastic","href":"#analyzing-azure-function-apm-data-in-elastic"},{"level":2,"title":"Service map","href":"#service-map"},{"level":3,"title":"Service details","href":"#service-details"},{"level":3,"title":"Transaction details","href":"#transaction-details"},{"level":3,"title":"Machine learning based latency correlation","href":"#machine-learning-based-latency-correlation"},{"level":3,"title":"Cold start detection","href":"#cold-start-detection"},{"level":2,"title":"Summary","href":"#summary"},{"level":2,"title":"Learn more","href":"#learn-more"}]},{"title":"Trace-based testing with Elastic APM and Tracetest","slug":"trace-based-testing-apm-tracetest","date":"2023-02-15","description":"Want to run trace-based tests with Elastic APM? We\'re happy to announce that Tracetest now integrates with Elastic Observability APM. Check out this hands-on example of how Tracetest works with Elastic Observability APM and OpenTelemetry.","image":"telescope-search-1680x980.png","author":[{"slug":"michael-hyatt","type":"Author","_raw":{}}],"tags":[{"slug":"apm","type":"Tag","_raw":{}},{"slug":"distributed-tracing","type":"Tag","_raw":{}},{"slug":"tracetest","type":"Tag","_raw":{}}],"body":{"raw":"\\n_This post was originally published on the_ [_Tracetest blog_](https://tracetest.io/blog/tracetest-integration-elastic-trace-based-testing-application-performance-monitoring)_._\\n\\nWant to run trace-based tests with Elastic APM? Today is your lucky day. We\'re happy to announce that Tracetest now integrates with Elastic Observability APM.\\n\\nCheck out this [hands-on example](https://github.com/kubeshop/tracetest/tree/main/examples/tracetest-elasticapm-with-elastic-agent) of how Tracetest works with Elastic Observability APM and OpenTelemetry!\\n\\n[Tracetest](https://tracetest.io/) is a [CNCF](https://www.cncf.io/) project aiming to provide a solution for deep integration and system testing by leveraging the rich data in distributed system traces. In this blog, we intend to provide an introduction to Tracetest and its capabilities, including how it can be integrated with [Elastic Application Performance Monitoring](https://www.elastic.co/observability/application-performance-monitoring) and [OpenTelemetry](https://opentelemetry.io/) to enhance the testing process.\\n\\n## Your good friend distributed tracing\\n\\nDistributed tracing is a way to understand how a distributed system works by tracking the flow of requests through the system. It can be used for a variety of purposes, such as identifying and fixing performance issues, figuring out what went wrong when an error occurs, and making sure that the system is running smoothly. Here are a few examples of how distributed tracing can be used:\\n\\n- **Monitoring performance:** Distributed tracing can help you keep an eye on how your distributed system is performing by showing you what\'s happening in real time. This can help you spot and fix problems like bottlenecks or slow response times that can make the system less reliable.\\n- **Finding the source of problems:** When something goes wrong, distributed tracing can help you figure out what happened by showing you the sequence of events that led up to the problem. This can help you pinpoint the specific service or component that\'s causing the issue and fix it.\\n- **Debugging:** Distributed tracing can help you find and fix bugs by giving you detailed information about what\'s happening in the system. This can help you understand why certain requests are behaving in unexpected ways and how to fix them.\\n- **Security:** Distributed tracing can help you keep an eye on security by showing you who is making requests to the system, where they are coming from, and what services are being accessed.\\n- **Optimization:** Distributed tracing can help you optimize the performance of the system by providing insight into how requests are flowing through it, which can help you identify areas that can be made more efficient and reduce the number of requests that need to be handled.\\n\\n## Distributed tracing — Now also for testing\\n\\nObservability, previously only used in operations, is now being applied in other areas of development, such as testing. This shift has led to the emergence of [\\"Observability-driven development\\"](https://www.infoq.com/articles/observability-driven-development/) and \\"trace-based testing\\" as new methods for using distributed tracing to test distributed applications.\\n\\nInstead of just checking that certain parts of the code are working, trace-driven testing follows the path that a request takes as it goes through the system. This way, you can make sure that the entire system is working properly and that the right output is produced for a given input. By using distributed tracing, developers can record what happens during the test and then use that information to check that everything is working as it should.\\n\\nThis method of testing can help to find problems that may be hard to detect with other types of testing and can better validate that the new code is working as expected. Additionally, distributed tracing provides information about what is happening during the test, such as how long it takes for a request to be processed and which services are being used, which can help developers understand how the code behaves in a real-world scenario.\\n\\n## Enters Tracetest\\n\\n[Tracetest](https://tracetest.io/) is a CNCF project that can run tests by verifying new traces against previously created assertions against other traces captured from the real systems. Here\'s how you can use Tracetest:\\n\\n- Capture the baseline good known trace. This will be the golden standard that you will use to write your tests and assertions. Trace-driven development is a better way to test how different parts of the system work together because it allows developers to test the entire process from start to finish, making sure that everything is working as it should and giving a more complete view of how the system is functioning instead of trying to create disjointed assertions validating the request execution.\\n- Now you can start validating your code changes against good known behavior captured previously.\\n- Tracetest can validate the resulting traces from the test and see if the system is working as it should. This can help you find problems that traditional testing methods might not catch.\\n- Create reports: Tracetest can also create reports that summarize the results of the test so that you can share the information with your team.\\n- Help you validate in production that the new requests follow the known path and run the predefined assertions against them.\\n\\nThe APM tool in Kibana, which is a familiar UI for many developers, can provide extra information when used with Tracetest. The APM tool can show you how the system is performing during the test and help you find issues using the familiar user interface. For example, the APM tool can show you how requests are moving through the system, how long requests take to be processed, and which parts of the system are being used. This information can help you identify and fix problems during testing.\\n\\nFurthermore, the APM tool can be set to show you all the data in real-time, which allows you to monitor the system\'s behavior during the test or even in production and helps you make sense of what Tracetest is showing.\\n\\n## How Tracetest works with Elastic APM to test the application\\n\\nThe components work together to provide a complete solution for testing distributed systems. The telemetry captured by the OpenTelemetry agent is sent to the Elastic APM Server, which processes and formats the data for indexing in Elasticsearch. The data can then be queried and analyzed using Kibana APM UI, and Tracetest can be used to conduct deep integration and system tests by utilizing the rich data contained in the distributed system trace.\\n\\nFor more details on Elastic\'s support for OpenTelelemetry, check out [Independence with OpenTelemetry on Elastic](https://www.elastic.co/blog/opentelemetry-observability).\\n\\n![](/assets/images/trace-based-testing-apm-tracetest/blog-elastic-distributed-system-trace.png)\\n\\n1. Tracetest initiates the test by sending a request to the application under test.\\n2. The application processes the request, and the built-in OpenTelemetry agent captures the telemetry data of the request. This data includes information such as request and response payloads, request and response headers, and any errors that occurred during the request processing. The agent then sends the captured telemetry data to the Elastic APM Server.\\n3. Elastic APM server consumes OpenTelemetry or Elastic APM spans and sends the data to be stored and indexed in Elasticsearch.\\n4. Tracetest polls Elasticsearch to retrieve the captured trace data. It makes use of Elasticsearch query to fetch the trace data. Tracetest compares the received trace data with the expected trace data and runs the assertions. This step is used to check whether the data received from the application matches the expected data and to check for any errors or issues that may have occurred during the request processing. Based on the results of the comparison, Tracetest will report any errors or issues found and will provide detailed information about the root cause of the problem. If the test passes, Tracetest will report that the test passed, and the test execution process will be completed.\\n5. The trace data is visible and can be analyzed in Kibana APM UI as well.\\n\\n## Running your first Tracetest environment with Elastic APM and Docker compose\\n\\nIn your existing observability setup, you have the [OpenTelemetry Nodejs agent](https://opentelemetry.io/docs/instrumentation/js/getting-started/nodejs/) configured in your code and [sending OpenTelemetry traces to the Elastic APM server that then stores](https://www.elastic.co/blog/opentelemetry-observability) them in Elasticsearch. Adding Tracetest to the infrastructure lets you write detailed trace-based tests based on the existing tracing infrastructure. Tracetest runs tests against endpoints and uses trace data to run assertions.\\n\\nThe example that we are going to run is from the Tracetest GitHub repository. It contains a docker-compose setup, which is a convenient way to run multiple services together in a defined environment. The example includes a sample application that has been instrumented with an OpenTelemetry agent. The example also includes the Tracetest server with its Postgres database, which is responsible for invoking the test, polling Elasticsearch to retrieve the captured trace data, comparing the received trace data with the expected trace data, and running the assertions. Finally, the example includes Elasticsearch, Kibana, and the Elastic APM server from the Elastic Stack.\\n\\nTo quickly access the example, you can run the following:\\n\\n```bash\\ngit clone https://github.com/kubeshop/tracetest.git\\ncd tracetest/examples/tracetest-elasticapm-with-otel\\ndocker-compose up -d\\n```\\n\\nOnce you have Tracetest set up, open https://localhost:11633 in your browser to check out the Web UI.\\n\\nNavigate to the Settings menu and ensure the connection to Elasticsearch is working by pressing Test Connection:\\n\\n![](/assets/images/trace-based-testing-apm-tracetest/blog-elastic-tracetest-configure-data-store.png)\\n\\nTo create a test, click the Create dropdown and choose Create New Test. Select the HTTP Request and give it a name and description.\\n\\n![](/assets/images/trace-based-testing-apm-tracetest/blog-elastic-create-new-test.png)\\n\\nFor this simple example, GET the Node.js app, which runs at https://app:8080.\\n\\n![](/assets/images/trace-based-testing-apm-tracetest/blog-elastic-trace-request-details.png)\\n\\nWith the test created, you can click the Trace tab to see the distributed trace. It’s simple, but you can start to see how it delivers immediate visibility into every transaction your HTTP request generates.\\n\\n![](/assets/images/trace-based-testing-apm-tracetest/blog-elastic-tracetest-trigger.png)\\n\\nFrom here, you can continue by adding assertions.\\n\\nTo make an assertion based on the GET / span of our trace, select that span in the graph view and click **Current span** in the Test Spec modal. Or, copy this span selector directly, using the [Tracetest Selector Language](https://docs.tracetest.io/concepts/selectors/):\\n\\n```javascript\\nspan[tracetest.span.type=\\"http\\" name=\\"GET /\\" http.target=\\"/\\" http.method=\\"GET\\"]\\n```\\n\\nBelow, add the attr:http.status_code attribute and the expected value, which is 200. You can add more complex assertions as well, like testing whether the span executes in less than 500ms. Add a new assertion for attr:http.status_code, choose \\\\<, and add 500ms as the expected value.\\n\\nYou can check against other properties, return statuses, timing, and much more, but we’ll keep it simple for now.\\n\\n![](/assets/images/trace-based-testing-apm-tracetest/blog-elastic-tracetest-edit-test-spec.png)\\n\\nThen click **Save Test Spec** , followed by **Publish** , and you’ve created your first assertion.If you open the APM app in Kibana at https://localhost:5601 (find the username and password from the examples/tracetest-elasticapm- **with** -otel/.env file), you will be able to navigate to the transaction generated by the test representing the overall application call with three underlying spans:\\n\\n![](/assets/images/trace-based-testing-apm-tracetest/blog-elastic-latency-distribution.png)\\n\\n## Summary\\n\\nElastic APM and Tracetest are tools that can help make testing distributed applications easier by providing a more comprehensive view of the system\'s behavior and allowing developers to identify and diagnose performance issues more efficiently. Tracetest allows you to test the entire process from start to finish, making sure that everything is working as it should, by following the path that a request takes.\\n\\nElastic APM provides detailed information about the performance of a system, including how requests are flowing through the system, how long requests take to be processed, and which services are being called. Together, these tools can help developers to identify and fix issues more quickly, improve collaboration and communication among the team, and ultimately improve the overall quality of the system.\\n\\n> - Elastic APM documentation: [https://www.elastic.co/guide/en/apm/guide/current/index.html](https://www.elastic.co/guide/en/apm/guide/current/index.html)\\n> - Tracetest documentation: [https://tracetest.io/docs/](https://tracetest.io/docs/) \\n> - Tracetest Github page: [https://github.com/kubeshop/tracetest](https://github.com/kubeshop/tracetest) \\n> - Elastic blog: [https://www.elastic.co/blog/category/technical-topics](https://www.elastic.co/blog/category/technical-topics) \\n> - Elastic APM community forum: [https://discuss.elastic.co/c/apm](https://discuss.elastic.co/c/apm) \\n> - Tracetest support: [Discord channel](https://discord.com/channels/884464549347074049/963470167327772703)\\n","code":"var Component=(()=>{var p=Object.create;var a=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,w=Object.prototype.hasOwnProperty;var y=(s,e)=>()=>(e||s((e={exports:{}}).exports,e),e.exports),f=(s,e)=>{for(var i in e)a(s,i,{get:e[i],enumerable:!0})},o=(s,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of g(e))!w.call(s,n)&&n!==i&&a(s,n,{get:()=>e[n],enumerable:!(r=u(e,n))||r.enumerable});return s};var b=(s,e,i)=>(i=s!=null?p(m(s)):{},o(e||!s||!s.__esModule?a(i,\\"default\\",{value:s,enumerable:!0}):i,s)),v=s=>o(a({},\\"__esModule\\",{value:!0}),s);var h=y((E,c)=>{c.exports=_jsx_runtime});var k={};f(k,{default:()=>d,frontmatter:()=>T});var t=b(h()),T={title:\\"Trace-based testing with Elastic APM and Tracetest\\",slug:\\"trace-based-testing-apm-tracetest\\",date:\\"2023-02-15\\",description:\\"Want to run trace-based tests with Elastic APM? We\'re happy to announce that Tracetest now integrates with Elastic Observability APM. Check out this hands-on example of how Tracetest works with Elastic Observability APM and OpenTelemetry.\\",author:[{slug:\\"michael-hyatt\\"}],image:\\"telescope-search-1680x980.png\\",tags:[{slug:\\"apm\\"},{slug:\\"distributed-tracing\\"},{slug:\\"tracetest\\"}]};function l(s){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...s.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.em,{children:\\"This post was originally published on the\\"}),\\" \\",(0,t.jsx)(e.a,{href:\\"https://tracetest.io/blog/tracetest-integration-elastic-trace-based-testing-application-performance-monitoring\\",rel:\\"nofollow\\",children:(0,t.jsx)(e.em,{children:\\"Tracetest blog\\"})}),(0,t.jsx)(e.em,{children:\\".\\"})]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Want to run trace-based tests with Elastic APM? Today is your lucky day. We\'re happy to announce that Tracetest now integrates with Elastic Observability APM.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Check out this \\",(0,t.jsx)(e.a,{href:\\"https://github.com/kubeshop/tracetest/tree/main/examples/tracetest-elasticapm-with-elastic-agent\\",rel:\\"nofollow\\",children:\\"hands-on example\\"}),\\" of how Tracetest works with Elastic Observability APM and OpenTelemetry!\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://tracetest.io/\\",rel:\\"nofollow\\",children:\\"Tracetest\\"}),\\" is a \\",(0,t.jsx)(e.a,{href:\\"https://www.cncf.io/\\",rel:\\"nofollow\\",children:\\"CNCF\\"}),\\" project aiming to provide a solution for deep integration and system testing by leveraging the rich data in distributed system traces. In this blog, we intend to provide an introduction to Tracetest and its capabilities, including how it can be integrated with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/application-performance-monitoring\\",rel:\\"nofollow\\",children:\\"Elastic Application Performance Monitoring\\"}),\\" and \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry\\"}),\\" to enhance the testing process.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"your-good-friend-distributed-tracing\\",children:\\"Your good friend distributed tracing\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Distributed tracing is a way to understand how a distributed system works by tracking the flow of requests through the system. It can be used for a variety of purposes, such as identifying and fixing performance issues, figuring out what went wrong when an error occurs, and making sure that the system is running smoothly. Here are a few examples of how distributed tracing can be used:\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Monitoring performance:\\"}),\\" Distributed tracing can help you keep an eye on how your distributed system is performing by showing you what\'s happening in real time. This can help you spot and fix problems like bottlenecks or slow response times that can make the system less reliable.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Finding the source of problems:\\"}),\\" When something goes wrong, distributed tracing can help you figure out what happened by showing you the sequence of events that led up to the problem. This can help you pinpoint the specific service or component that\'s causing the issue and fix it.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Debugging:\\"}),\\" Distributed tracing can help you find and fix bugs by giving you detailed information about what\'s happening in the system. This can help you understand why certain requests are behaving in unexpected ways and how to fix them.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Security:\\"}),\\" Distributed tracing can help you keep an eye on security by showing you who is making requests to the system, where they are coming from, and what services are being accessed.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Optimization:\\"}),\\" Distributed tracing can help you optimize the performance of the system by providing insight into how requests are flowing through it, which can help you identify areas that can be made more efficient and reduce the number of requests that need to be handled.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"distributed-tracing--now-also-for-testing\\",children:\\"Distributed tracing \\\\u2014 Now also for testing\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Observability, previously only used in operations, is now being applied in other areas of development, such as testing. This shift has led to the emergence of \\",(0,t.jsx)(e.a,{href:\\"https://www.infoq.com/articles/observability-driven-development/\\",rel:\\"nofollow\\",children:\'\\"Observability-driven development\\"\'}),\' and \\"trace-based testing\\" as new methods for using distributed tracing to test distributed applications.\']}),`\\n`,(0,t.jsx)(e.p,{children:\\"Instead of just checking that certain parts of the code are working, trace-driven testing follows the path that a request takes as it goes through the system. This way, you can make sure that the entire system is working properly and that the right output is produced for a given input. By using distributed tracing, developers can record what happens during the test and then use that information to check that everything is working as it should.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This method of testing can help to find problems that may be hard to detect with other types of testing and can better validate that the new code is working as expected. Additionally, distributed tracing provides information about what is happening during the test, such as how long it takes for a request to be processed and which services are being used, which can help developers understand how the code behaves in a real-world scenario.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"enters-tracetest\\",children:\\"Enters Tracetest\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://tracetest.io/\\",rel:\\"nofollow\\",children:\\"Tracetest\\"}),\\" is a CNCF project that can run tests by verifying new traces against previously created assertions against other traces captured from the real systems. Here\'s how you can use Tracetest:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Capture the baseline good known trace. This will be the golden standard that you will use to write your tests and assertions. Trace-driven development is a better way to test how different parts of the system work together because it allows developers to test the entire process from start to finish, making sure that everything is working as it should and giving a more complete view of how the system is functioning instead of trying to create disjointed assertions validating the request execution.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Now you can start validating your code changes against good known behavior captured previously.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Tracetest can validate the resulting traces from the test and see if the system is working as it should. This can help you find problems that traditional testing methods might not catch.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Create reports: Tracetest can also create reports that summarize the results of the test so that you can share the information with your team.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Help you validate in production that the new requests follow the known path and run the predefined assertions against them.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The APM tool in Kibana, which is a familiar UI for many developers, can provide extra information when used with Tracetest. The APM tool can show you how the system is performing during the test and help you find issues using the familiar user interface. For example, the APM tool can show you how requests are moving through the system, how long requests take to be processed, and which parts of the system are being used. This information can help you identify and fix problems during testing.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Furthermore, the APM tool can be set to show you all the data in real-time, which allows you to monitor the system\'s behavior during the test or even in production and helps you make sense of what Tracetest is showing.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"how-tracetest-works-with-elastic-apm-to-test-the-application\\",children:\\"How Tracetest works with Elastic APM to test the application\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The components work together to provide a complete solution for testing distributed systems. The telemetry captured by the OpenTelemetry agent is sent to the Elastic APM Server, which processes and formats the data for indexing in Elasticsearch. The data can then be queried and analyzed using Kibana APM UI, and Tracetest can be used to conduct deep integration and system tests by utilizing the rich data contained in the distributed system trace.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For more details on Elastic\'s support for OpenTelelemetry, check out \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"Independence with OpenTelemetry on Elastic\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/trace-based-testing-apm-tracetest/blog-elastic-distributed-system-trace.png\\",alt:\\"\\",width:\\"810\\",height:\\"421\\"})}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"Tracetest initiates the test by sending a request to the application under test.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"The application processes the request, and the built-in OpenTelemetry agent captures the telemetry data of the request. This data includes information such as request and response payloads, request and response headers, and any errors that occurred during the request processing. The agent then sends the captured telemetry data to the Elastic APM Server.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Elastic APM server consumes OpenTelemetry or Elastic APM spans and sends the data to be stored and indexed in Elasticsearch.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Tracetest polls Elasticsearch to retrieve the captured trace data. It makes use of Elasticsearch query to fetch the trace data. Tracetest compares the received trace data with the expected trace data and runs the assertions. This step is used to check whether the data received from the application matches the expected data and to check for any errors or issues that may have occurred during the request processing. Based on the results of the comparison, Tracetest will report any errors or issues found and will provide detailed information about the root cause of the problem. If the test passes, Tracetest will report that the test passed, and the test execution process will be completed.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"The trace data is visible and can be analyzed in Kibana APM UI as well.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"running-your-first-tracetest-environment-with-elastic-apm-and-docker-compose\\",children:\\"Running your first Tracetest environment with Elastic APM and Docker compose\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In your existing observability setup, you have the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/instrumentation/js/getting-started/nodejs/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Nodejs agent\\"}),\\" configured in your code and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/opentelemetry-observability\\",rel:\\"nofollow\\",children:\\"sending OpenTelemetry traces to the Elastic APM server that then stores\\"}),\\" them in Elasticsearch. Adding Tracetest to the infrastructure lets you write detailed trace-based tests based on the existing tracing infrastructure. Tracetest runs tests against endpoints and uses trace data to run assertions.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The example that we are going to run is from the Tracetest GitHub repository. It contains a docker-compose setup, which is a convenient way to run multiple services together in a defined environment. The example includes a sample application that has been instrumented with an OpenTelemetry agent. The example also includes the Tracetest server with its Postgres database, which is responsible for invoking the test, polling Elasticsearch to retrieve the captured trace data, comparing the received trace data with the expected trace data, and running the assertions. Finally, the example includes Elasticsearch, Kibana, and the Elastic APM server from the Elastic Stack.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To quickly access the example, you can run the following:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`git clone https://github.com/kubeshop/tracetest.git\\ncd tracetest/examples/tracetest-elasticapm-with-otel\\ndocker-compose up -d\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Once you have Tracetest set up, open \\",(0,t.jsx)(e.a,{href:\\"https://localhost:11633\\",rel:\\"nofollow\\",children:\\"https://localhost:11633\\"}),\\" in your browser to check out the Web UI.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Navigate to the Settings menu and ensure the connection to Elasticsearch is working by pressing Test Connection:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/trace-based-testing-apm-tracetest/blog-elastic-tracetest-configure-data-store.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1268\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"To create a test, click the Create dropdown and choose Create New Test. Select the HTTP Request and give it a name and description.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/trace-based-testing-apm-tracetest/blog-elastic-create-new-test.png\\",alt:\\"\\",width:\\"1328\\",height:\\"960\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For this simple example, GET the Node.js app, which runs at \\",(0,t.jsx)(e.a,{href:\\"https://app:8080\\",rel:\\"nofollow\\",children:\\"https://app:8080\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/trace-based-testing-apm-tracetest/blog-elastic-trace-request-details.png\\",alt:\\"\\",width:\\"1328\\",height:\\"960\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With the test created, you can click the Trace tab to see the distributed trace. It\\\\u2019s simple, but you can start to see how it delivers immediate visibility into every transaction your HTTP request generates.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/trace-based-testing-apm-tracetest/blog-elastic-tracetest-trigger.png\\",alt:\\"\\",width:\\"1328\\",height:\\"960\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"From here, you can continue by adding assertions.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To make an assertion based on the GET / span of our trace, select that span in the graph view and click \\",(0,t.jsx)(e.strong,{children:\\"Current span\\"}),\\" in the Test Spec modal. Or, copy this span selector directly, using the \\",(0,t.jsx)(e.a,{href:\\"https://docs.tracetest.io/concepts/selectors/\\",rel:\\"nofollow\\",children:\\"Tracetest Selector Language\\"}),\\":\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-javascript\\",children:`span[tracetest.span.type=\\"http\\" name=\\"GET /\\" http.target=\\"/\\" http.method=\\"GET\\"]\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Below, add the attr:http.status_code attribute and the expected value, which is 200. You can add more complex assertions as well, like testing whether the span executes in less than 500ms. Add a new assertion for attr:http.status_code, choose <, and add 500ms as the expected value.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You can check against other properties, return statuses, timing, and much more, but we\\\\u2019ll keep it simple for now.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/trace-based-testing-apm-tracetest/blog-elastic-tracetest-edit-test-spec.png\\",alt:\\"\\",width:\\"1328\\",height:\\"960\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Then click \\",(0,t.jsx)(e.strong,{children:\\"Save Test Spec\\"}),\\" , followed by \\",(0,t.jsx)(e.strong,{children:\\"Publish\\"}),\\" , and you\\\\u2019ve created your first assertion.If you open the APM app in Kibana at \\",(0,t.jsx)(e.a,{href:\\"https://localhost:5601\\",rel:\\"nofollow\\",children:\\"https://localhost:5601\\"}),\\" (find the username and password from the examples/tracetest-elasticapm- \\",(0,t.jsx)(e.strong,{children:\\"with\\"}),\\" -otel/.env file), you will be able to navigate to the transaction generated by the test representing the overall application call with three underlying spans:\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/trace-based-testing-apm-tracetest/blog-elastic-latency-distribution.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1211\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic APM and Tracetest are tools that can help make testing distributed applications easier by providing a more comprehensive view of the system\'s behavior and allowing developers to identify and diagnose performance issues more efficiently. Tracetest allows you to test the entire process from start to finish, making sure that everything is working as it should, by following the path that a request takes.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic APM provides detailed information about the performance of a system, including how requests are flowing through the system, how long requests take to be processed, and which services are being called. Together, these tools can help developers to identify and fix issues more quickly, improve collaboration and communication among the team, and ultimately improve the overall quality of the system.\\"}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Elastic APM documentation: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/apm/guide/current/index.html\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/guide/en/apm/guide/current/index.html\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Tracetest documentation: \\",(0,t.jsx)(e.a,{href:\\"https://tracetest.io/docs/\\",rel:\\"nofollow\\",children:\\"https://tracetest.io/docs/\\"}),\\"\\\\xA0\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Tracetest Github page: \\",(0,t.jsx)(e.a,{href:\\"https://github.com/kubeshop/tracetest\\",rel:\\"nofollow\\",children:\\"https://github.com/kubeshop/tracetest\\"}),\\"\\\\xA0\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Elastic blog: \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/category/technical-topics\\",rel:\\"nofollow\\",children:\\"https://www.elastic.co/blog/category/technical-topics\\"}),\\"\\\\xA0\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Elastic APM community forum: \\",(0,t.jsx)(e.a,{href:\\"https://discuss.elastic.co/c/apm\\",rel:\\"nofollow\\",children:\\"https://discuss.elastic.co/c/apm\\"}),\\"\\\\xA0\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Tracetest support: \\",(0,t.jsx)(e.a,{href:\\"https://discord.com/channels/884464549347074049/963470167327772703\\",rel:\\"nofollow\\",children:\\"Discord channel\\"})]}),`\\n`]}),`\\n`]})]})}function d(s={}){let{wrapper:e}=s.components||{};return e?(0,t.jsx)(e,{...s,children:(0,t.jsx)(l,{...s})}):l(s)}return v(k);})();\\n;return Component;"},"_id":"articles/trace-based-testing-elastic-apm-tracetest.mdx","_raw":{"sourceFilePath":"articles/trace-based-testing-elastic-apm-tracetest.mdx","sourceFileName":"trace-based-testing-elastic-apm-tracetest.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/trace-based-testing-elastic-apm-tracetest"},"type":"Article","imageUrl":"/assets/images/trace-based-testing-apm-tracetest/telescope-search-1680x980.png","readingTime":"10 min read","url":"/trace-based-testing-apm-tracetest","headings":[{"level":2,"title":"Your good friend distributed tracing","href":"#your-good-friend-distributed-tracing"},{"level":2,"title":"Distributed tracing — Now also for testing","href":"#distributed-tracing--now-also-for-testing"},{"level":2,"title":"Enters Tracetest","href":"#enters-tracetest"},{"level":2,"title":"How Tracetest works with Elastic APM to test the application","href":"#how-tracetest-works-with-elastic-apm-to-test-the-application"},{"level":2,"title":"Running your first Tracetest environment with Elastic APM and Docker compose","href":"#running-your-first-tracetest-environment-with-elastic-apm-and-docker-compose"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Revealing unknowns in your tracing data with inferred spans in OpenTelemetry","slug":"tracing-data-inferred-spans-opentelemetry","date":"2024-04-22","description":"Distributed tracing is essential in understanding complex systems, but it can miss latency issue details. By combining profiling techniques with distributed tracing, Elastic provides the inferred spans feature as an extension for the OTel Java SDK.","image":"148360-Blog-header-image--Revealing-Unknowns-in-your-Tracing-Data-with-Inferred-Spans-in-OpenTelemetry_V1.jpg","author":[{"slug":"jonas-kunz","type":"Author","_raw":{}},{"slug":"alexander-wert","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn the complex world of microservices and distributed systems, achieving transparency and understanding the intricacies and inefficiencies of service interactions and request flows has become a paramount challenge. Distributed tracing is essential in understanding distributed systems. But distributed tracing, whether manually applied or auto-instrumented, is usually rather coarse-grained. Hence, distributed tracing covers only a limited fraction of the system and can easily miss parts of the system that are the most useful to trace.\\n\\nAddressing this gap, Elastic developed the concept of inferred spans as a powerful enhancement to traditional instrumentation-based tracing as an extension for the OpenTelemetry Java SDK/Agent. We are in the process of contributing this back to OpenTelemetry, until then our [extension](https://github.com/elastic/elastic-otel-java/tree/main/inferred-spans) can be seamlessly used with the existing OpenTelelemetry Java SDK (as described below).\\n\\nInferred spans are designed to augment the visibility provided by instrumentation-based traces, shedding light on latency sources within the application or libraries that were previously uninstrumented. This feature significantly expands the utility of distributed tracing, allowing for a more comprehensive understanding of system behavior and facilitating a deeper dive into performance optimization.\\n\\n## What is inferred spans?\\n\\nInferred spans is an observability technique that combines distributed tracing with profiling techniques to illuminate the darker, unobserved corners of your application — areas where standard instrumentation techniques fall short. The inferred spans feature interweaves information derived from profiling stacktraces with instrumentation-based tracing data, allowing for the generation of new spans based on the insights drawn from profiling data.\\n\\nThis feature proves invaluable when dealing with custom code or third-party libraries that significantly contribute to the request latency but lack built-in or external instrumentation support. Often, identifying or crafting specific instrumentation for these segments can range from challenging to outright unfeasible. Moreover, certain scenarios exist where implementing instrumentation is impractical due to the potential for substantial performance overhead. For instance, instrumenting application locking mechanisms, despite their critical role, is not viable because of their ubiquitous nature and the significant latency overhead the instrumentation can introduce to application requests. Still, ideally, such latency issues would be visible within your distributed traces.\\n\\nInferred spans ensures a deeper visibility into your application’s performance dynamics including the above-mentioned scenarios.\\n\\n## Inferred spans in action\\n\\nTo demonstrate the inferred spans feature we will use the Java implementation of the [Elastiflix demo application](https://github.com/elastic/observability-examples/tree/main/Elastiflix/java-favorite). Elasticflix has an endpoint called favorites that does some Redis calls and also includes an artificial delay. First, we use the plain OpenTelemetry Java Agent to instrument our application:\\n\\n```java\\njava -javaagent:/path/to/otel-javaagent-.jar \\\\\\n-Dotel.service.name=my-service-name \\\\\\n-Dotel.exporter.otlp.endpoint=https:// \\\\\\n\\"-Dotel.exporter.otlp.headers=Authorization=Bearer SECRETTOKENHERE\\" \\\\\\n-jar my-service-name.jar\\n```\\n\\nWith the OpenTelemetry Java Agent we get out-of-the-box instrumentation for HTTP entry points and calls to Redis for our Elastiflix application. The resulting traces contain spans for the POST /favorites entrypoint, as well as a few short spans for the calls to Redis.\\n\\n![POST /favorites entrypoint](/assets/images/tracing-data-inferred-spans-opentelemetry/image2.png)\\n\\nAs you can see in the trace above, it’s not clear where most of the time is spent within the POST /favorites request.\\n\\nLet’s see how inferred spans can shed light into these areas. You can use the inferred spans feature either manually with your OpenTelemetry SDK (see section below), package it as a drop-in extension for the upstream OpenTelemetry Java agent, or just use [Elastic’s distribution of the OpenTelemetry Java agent](https://github.com/elastic/elastic-otel-java/tree/main) that comes with the inferred spans feature.\\n\\nFor convenience, we just download the [agent jar](https://mvnrepository.com/artifact/co.elastic.otel/elastic-otel-javaagent/0.0.1) of the Elastic distribution and extend the configuration to enable the inferred spans feature:\\n\\n```java\\njava -javaagent:/path/to/elastic-otel-javaagent-.jar \\\\\\n-Dotel.service.name=my-service-name \\\\\\n-Dotel.exporter.otlp.endpoint=https://XX.apm.europe-west3.gcp.cloud.es.io:443 \\\\\\n\\"-Dotel.exporter.otlp.headers=Authorization=Bearer SECRETTOKENHERE\\" \\\\\\n-Delastic.otel.inferred.spans.enabled=true \\\\\\n-jar my-service-name.jar\\n```\\n\\nThe only non-standard option here is elastic.otel.inferred.spans.enabled: The inferred spans Feature is currently opt-in and therefore needs to be enabled explicitly. Running the same application with the inferred spans feature enabled yields more comprehensive traces:\\n\\n![more comprehensive traces](/assets/images/tracing-data-inferred-spans-opentelemetry/image1.png)\\n\\nThe inferred-spans (colored blue in the above screenshot) follow the naming pattern Class#method. With that, the inferred spans feature helps us pinpoint the exact methods that contribute the most to the overall latency of the request. Note that the parent-child relationship between the HTTP entry span, the Redis spans, and the inferred spans is reconstructed correctly, resulting in a fully functional trace structure.\\n\\nExamining the handleDelay method within the Elastiflix application reveals the use of a straightforward sleep statement. Although the sleep method is not CPU-bound, the full duration of this delay is captured as inferred spans. This stems from employing the async-profiler\'s wall clock time profiling, as opposed to solely relying on CPU profiling. The ability of the inferred spans feature to reflect actual latency, including for I/O operations and other non-CPU-bound tasks, represents a significant advancement. It allows for diagnosing and resolving performance issues that extend beyond CPU limitations, offering a more nuanced view of system behavior.\\n\\n## Using inferred spans with your own OpenTelemetry SDK\\n\\nOpenTelemetry is a highly extensible framework: Elastic embraces this extensibility by also publishing most extensions shipped with our OpenTelemetry Java Distro as standalone-extensions to the [OpenTelemetry Java SDK](https://github.com/open-telemetry/opentelemetry-java).\\n\\nAs a result, if you do not want to use our distro (e.g., because you don’t need or want bytecode instrumentation in your project), you can still use our extensions, such as the extension for the inferred spans feature. All you need to do is set up the [OpenTelemetry SDK in your code](https://opentelemetry.io/docs/languages/java/instrumentation/#initialize-the-sdk) and add the inferred spans extension as a dependency:\\n\\n```xml\\n\\n co.elastic.otel\\n inferred-spans\\n {latest version}\\n\\n```\\n\\nDuring your SDK setup, you’ll have to initialize and register the extension:\\n\\n```java\\nInferredSpansProcessor inferredSpans = InferredSpansProcessor.builder()\\n .samplingInterval(Duration.ofMillis(10)) //the builder offers all config options\\n .build();\\nSdkTracerProvider tracerProvider = SdkTracerProvider.builder()\\n .addSpanProcessor(inferredSpans)\\n.addSpanProcessor(BatchSpanProcessor.builder(OtlpGrpcSpanExporter.builder()\\n .setEndpoint(\\"https://\\")\\n .addHeader(\\"Authorization\\", \\"Bearer \\")\\n .build()).build())\\n .build();\\ninferredSpans.setTracerProvider(tracerProvider);\\n```\\n\\nThe inferred spans extension seamlessly integrates with the [OpenTelemetry SDK Autoconfiguration mechanism](https://opentelemetry.io/docs/languages/java/instrumentation/#automatic-configuration). By incorporating the OpenTelemetry SDK and its extensions as dependencies within your application code — rather than through an external agent — you gain the flexibility to configure them using the same environment variables or JVM properties. Once the inferred spans extension is included in your classpath, activating it for autoconfigured SDKs becomes straightforward. Simply enable it using the elastic.otel.inferred.spans.enabled property, as previously described, to leverage the full capabilities of this feature with minimal setup.\\n\\n## How does inferred spans work?\\n\\nThe inferred spans feature leverages the capabilities of collecting wall clock time profiling data of the widely-used [async-profiler](https://github.com/async-profiler/async-profiler), a low-overhead, popular production-time profiler in the Java ecosystem. It then transforms the profiling data into actionable spans as part of the distributed traces. But what mechanism allows for this transformation?\\n\\nEssentially, the inferred spans extension engages with the lifecycle of span events, specifically when a span is either activated or deactivated across any thread via the [OpenTelemetry context](https://opentelemetry.io/docs/specs/otel/context/). Upon the activation of the initial span within a transaction, the extension commences a session of wall-clock profiling via the async-profiler, set to a predetermined duration. Concurrently, it logs the details of all span activations and deactivations, capturing their respective timestamps and the threads on which they occurred.\\n\\nFollowing the completion of the profiling session, the extension processes the profiling data alongside the log of span events. By correlating the data, it reconstructs the inferred spans. It\'s important to note that, in certain complex scenarios, the correlation may assign an incorrect name to a span. To mitigate this and aid in accurate identification, the extension enriches the inferred spans with stacktrace segments under the code.stacktrace attribute, offering users clarity and insight into the precise methods implicated.\\n\\n## Inferred spans vs. correlation of traces with profiling data\\n\\nIn the wake of OpenTelemetry\'s recent [announcement of the profiling signal](https://opentelemetry.io/blog/2024/profiling/), coupled with [Elastic\'s commitment to donating the Universal Profiling Agent](https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry) to OpenTelemetry, you might be wondering about how the inferred spans feature differentiates from merely correlating profiling data with distributed traces using span IDs and trace IDs. Rather than viewing these as competing functionalities, it\'s more accurate to consider them complementary.\\n\\nThe inferred spans feature and the correlation of tracing with profiling data both employ similar methodologies — melding tracing information with profiling data. However, they each shine in distinct areas. Inferred spans excels at identifying long-running methods that could escape notice with traditional CPU profiling, which is more adept at pinpointing CPU bottlenecks. A unique advantage of inferred spans is its ability to account for I/O time, capturing delays caused by operations like disk access that wouldn\'t typically be visible in CPU profiling flamegraphs.\\n\\nHowever, the inferred spans feature has its limitations, notably in detecting latency issues arising from \\"death by a thousand cuts\\" — where a method, although not time-consuming per invocation, significantly impacts total latency due to being called numerous times across a request. While individual calls might not be captured as inferred spans due to their brevity, CPU-bound methods contributing to latency are unveiled through CPU profiling, as flamegraphs display the aggregate CPU time consumed by these methods.\\n\\nAn additional strength of the inferred spans feature lies in its data structure, offering a simplified tracing model that outlines typical parent-child relationships, execution order, and good latency estimates. This structure is achieved by integrating tracing data with span activation/deactivation events and profiling data, facilitating straightforward navigation and troubleshooting of latency issues within individual traces.\\n\\nCorrelating distributed tracing data with profiling data comes with a different set of advantages. Learn more about it in our related blog post, [Beyond the trace: Pinpointing performance culprits with continuous profiling and distributed tracing correlation](https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation).\\n\\n## What about the performance overhead?\\n\\nAs mentioned before, the inferred spans functionality is based on the widely used async-profiler, known for its minimal impact on performance. However, the efficiency of profiling operations is not without its caveats, largely influenced by the specific configurations employed. A pivotal factor in this balancing act is the sampling interval — the longer the interval between samples, the lower the incurred overhead, albeit at the expense of potentially overlooking shorter methods that could be critical to the inferred spans feature discovery process.\\n\\nAdjusting the probability-based trace sampling presents another way for optimization, directly influencing the overhead. For instance, setting trace sampling to 50% effectively halves the profiling load, making the inferred spans feature even more resource-efficient on average per request. This nuanced approach to tuning ensures that the inferred spans feature can be leveraged in real-world, production environments with a manageable performance footprint. When properly configured, this feature offers a potent, low-overhead solution for enhancing observability and diagnostic capabilities within production applications.\\n\\n## What’s next for inferred spans and OpenTelemetry?\\n\\nThis blog post outlined and introduced the inferred spans feature available as an extension for the OpenTelemetry Java SDK and built into the newly introduced Elastic OpenTelemetry Java Distro. Inferred spans allows users to troubleshoot latency issues in areas of code that are not explicitly instrumented while utilizing traditional tracing data.\\n\\nThe feature is currently merely a port of the existing feature from the proprietary Elastic APM Agent. With Elastic embracing OpenTelemetry, we plan on contributing this extension to the upstream OpenTelemetry project. For that, we also plan on migrating the extension to the latest async-profiler 3.x release. [Try out inferred spans for yourself](https://github.com/elastic/elastic-otel-java/tree/main/inferred-spans) and see how it can help you diagnose performance problems in your applications.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,g=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var i in e)r(n,i,{get:e[i],enumerable:!0})},o=(n,e,i,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of f(e))!g.call(n,a)&&a!==i&&r(n,a,{get:()=>e[a],enumerable:!(s=u(e,a))||s.enumerable});return n};var v=(n,e,i)=>(i=n!=null?p(m(n)):{},o(e||!n||!n.__esModule?r(i,\\"default\\",{value:n,enumerable:!0}):i,n)),w=n=>o(r({},\\"__esModule\\",{value:!0}),n);var c=y((O,l)=>{l.exports=_jsx_runtime});var T={};b(T,{default:()=>h,frontmatter:()=>x});var t=v(c()),x={title:\\"Revealing unknowns in your tracing data with inferred spans in OpenTelemetry\\",slug:\\"tracing-data-inferred-spans-opentelemetry\\",date:\\"2024-04-22\\",description:\\"Distributed tracing is essential in understanding complex systems, but it can miss latency issue details. By combining profiling techniques with distributed tracing, Elastic provides the inferred spans feature as an extension for the OTel Java SDK.\\",author:[{slug:\\"jonas-kunz\\"},{slug:\\"alexander-wert\\"}],image:\\"148360-Blog-header-image--Revealing-Unknowns-in-your-Tracing-Data-with-Inferred-Spans-in-OpenTelemetry_V1.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"}]};function d(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",p:\\"p\\",pre:\\"pre\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"In the complex world of microservices and distributed systems, achieving transparency and understanding the intricacies and inefficiencies of service interactions and request flows has become a paramount challenge. Distributed tracing is essential in understanding distributed systems. But distributed tracing, whether manually applied or auto-instrumented, is usually rather coarse-grained. Hence, distributed tracing covers only a limited fraction of the system and can easily miss parts of the system that are the most useful to trace.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Addressing this gap, Elastic developed the concept of inferred spans as a powerful enhancement to traditional instrumentation-based tracing as an extension for the OpenTelemetry Java SDK/Agent. We are in the process of contributing this back to OpenTelemetry, until then our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java/tree/main/inferred-spans\\",rel:\\"nofollow\\",children:\\"extension\\"}),\\" can be seamlessly used with the existing OpenTelelemetry Java SDK (as described below).\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Inferred spans are designed to augment the visibility provided by instrumentation-based traces, shedding light on latency sources within the application or libraries that were previously uninstrumented. This feature significantly expands the utility of distributed tracing, allowing for a more comprehensive understanding of system behavior and facilitating a deeper dive into performance optimization.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-is-inferred-spans\\",children:\\"What is inferred spans?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Inferred spans is an observability technique that combines distributed tracing with profiling techniques to illuminate the darker, unobserved corners of your application \\\\u2014 areas where standard instrumentation techniques fall short. The inferred spans feature interweaves information derived from profiling stacktraces with instrumentation-based tracing data, allowing for the generation of new spans based on the insights drawn from profiling data.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This feature proves invaluable when dealing with custom code or third-party libraries that significantly contribute to the request latency but lack built-in or external instrumentation support. Often, identifying or crafting specific instrumentation for these segments can range from challenging to outright unfeasible. Moreover, certain scenarios exist where implementing instrumentation is impractical due to the potential for substantial performance overhead. For instance, instrumenting application locking mechanisms, despite their critical role, is not viable because of their ubiquitous nature and the significant latency overhead the instrumentation can introduce to application requests. Still, ideally, such latency issues would be visible within your distributed traces.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Inferred spans ensures a deeper visibility into your application\\\\u2019s performance dynamics including the above-mentioned scenarios.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"inferred-spans-in-action\\",children:\\"Inferred spans in action\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To demonstrate the inferred spans feature we will use the Java implementation of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/observability-examples/tree/main/Elastiflix/java-favorite\\",rel:\\"nofollow\\",children:\\"Elastiflix demo application\\"}),\\". Elasticflix has an endpoint called favorites that does some Redis calls and also includes an artificial delay. First, we use the plain OpenTelemetry Java Agent to instrument our application:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`java -javaagent:/path/to/otel-javaagent-.jar \\\\\\\\\\n-Dotel.service.name=my-service-name \\\\\\\\\\n-Dotel.exporter.otlp.endpoint=https:// \\\\\\\\\\n\\"-Dotel.exporter.otlp.headers=Authorization=Bearer SECRETTOKENHERE\\" \\\\\\\\\\n-jar my-service-name.jar\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With the OpenTelemetry Java Agent we get out-of-the-box instrumentation for HTTP entry points and calls to Redis for our Elastiflix application. The resulting traces contain spans for the POST /favorites entrypoint, as well as a few short spans for the calls to Redis.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/tracing-data-inferred-spans-opentelemetry/image2.png\\",alt:\\"POST /favorites entrypoint\\",width:\\"1625\\",height:\\"447\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see in the trace above, it\\\\u2019s not clear where most of the time is spent within the POST /favorites request.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Let\\\\u2019s see how inferred spans can shed light into these areas. You can use the inferred spans feature either manually with your OpenTelemetry SDK (see section below), package it as a drop-in extension for the upstream OpenTelemetry Java agent, or just use \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java/tree/main\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s distribution of the OpenTelemetry Java agent\\"}),\\" that comes with the inferred spans feature.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For convenience, we just download the \\",(0,t.jsx)(e.a,{href:\\"https://mvnrepository.com/artifact/co.elastic.otel/elastic-otel-javaagent/0.0.1\\",rel:\\"nofollow\\",children:\\"agent jar\\"}),\\" of the Elastic distribution and extend the configuration to enable the inferred spans feature:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`java -javaagent:/path/to/elastic-otel-javaagent-.jar \\\\\\\\\\n-Dotel.service.name=my-service-name \\\\\\\\\\n-Dotel.exporter.otlp.endpoint=https://XX.apm.europe-west3.gcp.cloud.es.io:443 \\\\\\\\\\n\\"-Dotel.exporter.otlp.headers=Authorization=Bearer SECRETTOKENHERE\\" \\\\\\\\\\n-Delastic.otel.inferred.spans.enabled=true \\\\\\\\\\n-jar my-service-name.jar\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The only non-standard option here is elastic.otel.inferred.spans.enabled: The inferred spans Feature is currently opt-in and therefore needs to be enabled explicitly. Running the same application with the inferred spans feature enabled yields more comprehensive traces:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/tracing-data-inferred-spans-opentelemetry/image1.png\\",alt:\\"more comprehensive traces\\",width:\\"1621\\",height:\\"596\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The inferred-spans (colored blue in the above screenshot) follow the naming pattern Class#method. With that, the inferred spans feature helps us pinpoint the exact methods that contribute the most to the overall latency of the request. Note that the parent-child relationship between the HTTP entry span, the Redis spans, and the inferred spans is reconstructed correctly, resulting in a fully functional trace structure.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Examining the handleDelay method within the Elastiflix application reveals the use of a straightforward sleep statement. Although the sleep method is not CPU-bound, the full duration of this delay is captured as inferred spans. This stems from employing the async-profiler\'s wall clock time profiling, as opposed to solely relying on CPU profiling. The ability of the inferred spans feature to reflect actual latency, including for I/O operations and other non-CPU-bound tasks, represents a significant advancement. It allows for diagnosing and resolving performance issues that extend beyond CPU limitations, offering a more nuanced view of system behavior.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"using-inferred-spans-with-your-own-opentelemetry-sdk\\",children:\\"Using inferred spans with your own OpenTelemetry SDK\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"OpenTelemetry is a highly extensible framework: Elastic embraces this extensibility by also publishing most extensions shipped with our OpenTelemetry Java Distro as standalone-extensions to the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java SDK\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As a result, if you do not want to use our distro (e.g., because you don\\\\u2019t need or want bytecode instrumentation in your project), you can still use our extensions, such as the extension for the inferred spans feature. All you need to do is set up the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/languages/java/instrumentation/#initialize-the-sdk\\",rel:\\"nofollow\\",children:\\"OpenTelemetry SDK in your code\\"}),\\" and add the inferred spans extension as a dependency:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-xml\\",children:`\\n co.elastic.otel\\n inferred-spans\\n {latest version}\\n\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"During your SDK setup, you\\\\u2019ll have to initialize and register the extension:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`InferredSpansProcessor inferredSpans = InferredSpansProcessor.builder()\\n .samplingInterval(Duration.ofMillis(10)) //the builder offers all config options\\n .build();\\nSdkTracerProvider tracerProvider = SdkTracerProvider.builder()\\n .addSpanProcessor(inferredSpans)\\n.addSpanProcessor(BatchSpanProcessor.builder(OtlpGrpcSpanExporter.builder()\\n .setEndpoint(\\"https://\\")\\n .addHeader(\\"Authorization\\", \\"Bearer \\")\\n .build()).build())\\n .build();\\ninferredSpans.setTracerProvider(tracerProvider);\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The inferred spans extension seamlessly integrates with the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/languages/java/instrumentation/#automatic-configuration\\",rel:\\"nofollow\\",children:\\"OpenTelemetry SDK Autoconfiguration mechanism\\"}),\\". By incorporating the OpenTelemetry SDK and its extensions as dependencies within your application code \\\\u2014 rather than through an external agent \\\\u2014 you gain the flexibility to configure them using the same environment variables or JVM properties. Once the inferred spans extension is included in your classpath, activating it for autoconfigured SDKs becomes straightforward. Simply enable it using the elastic.otel.inferred.spans.enabled property, as previously described, to leverage the full capabilities of this feature with minimal setup.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"how-does-inferred-spans-work\\",children:\\"How does inferred spans work?\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The inferred spans feature leverages the capabilities of collecting wall clock time profiling data of the widely-used \\",(0,t.jsx)(e.a,{href:\\"https://github.com/async-profiler/async-profiler\\",rel:\\"nofollow\\",children:\\"async-profiler\\"}),\\", a low-overhead, popular production-time profiler in the Java ecosystem. It then transforms the profiling data into actionable spans as part of the distributed traces. But what mechanism allows for this transformation?\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Essentially, the inferred spans extension engages with the lifecycle of span events, specifically when a span is either activated or deactivated across any thread via the \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/specs/otel/context/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry context\\"}),\\". Upon the activation of the initial span within a transaction, the extension commences a session of wall-clock profiling via the async-profiler, set to a predetermined duration. Concurrently, it logs the details of all span activations and deactivations, capturing their respective timestamps and the threads on which they occurred.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Following the completion of the profiling session, the extension processes the profiling data alongside the log of span events. By correlating the data, it reconstructs the inferred spans. It\'s important to note that, in certain complex scenarios, the correlation may assign an incorrect name to a span. To mitigate this and aid in accurate identification, the extension enriches the inferred spans with stacktrace segments under the code.stacktrace attribute, offering users clarity and insight into the precise methods implicated.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"inferred-spans-vs-correlation-of-traces-with-profiling-data\\",children:\\"Inferred spans vs. correlation of traces with profiling data\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the wake of OpenTelemetry\'s recent \\",(0,t.jsx)(e.a,{href:\\"https://opentelemetry.io/blog/2024/profiling/\\",rel:\\"nofollow\\",children:\\"announcement of the profiling signal\\"}),\\", coupled with \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-donation-proposal-to-contribute-profiling-agent-to-opentelemetry\\",rel:\\"nofollow\\",children:\\"Elastic\'s commitment to donating the Universal Profiling Agent\\"}),\\" to OpenTelemetry, you might be wondering about how the inferred spans feature differentiates from merely correlating profiling data with distributed traces using span IDs and trace IDs. Rather than viewing these as competing functionalities, it\'s more accurate to consider them complementary.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The inferred spans feature and the correlation of tracing with profiling data both employ similar methodologies \\\\u2014 melding tracing information with profiling data. However, they each shine in distinct areas. Inferred spans excels at identifying long-running methods that could escape notice with traditional CPU profiling, which is more adept at pinpointing CPU bottlenecks. A unique advantage of inferred spans is its ability to account for I/O time, capturing delays caused by operations like disk access that wouldn\'t typically be visible in CPU profiling flamegraphs.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\'However, the inferred spans feature has its limitations, notably in detecting latency issues arising from \\"death by a thousand cuts\\" \\\\u2014 where a method, although not time-consuming per invocation, significantly impacts total latency due to being called numerous times across a request. While individual calls might not be captured as inferred spans due to their brevity, CPU-bound methods contributing to latency are unveiled through CPU profiling, as flamegraphs display the aggregate CPU time consumed by these methods.\'}),`\\n`,(0,t.jsx)(e.p,{children:\\"An additional strength of the inferred spans feature lies in its data structure, offering a simplified tracing model that outlines typical parent-child relationships, execution order, and good latency estimates. This structure is achieved by integrating tracing data with span activation/deactivation events and profiling data, facilitating straightforward navigation and troubleshooting of latency issues within individual traces.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Correlating distributed tracing data with profiling data comes with a different set of advantages. Learn more about it in our related blog post, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-distributed-tracing-correlation\\",rel:\\"nofollow\\",children:\\"Beyond the trace: Pinpointing performance culprits with continuous profiling and distributed tracing correlation\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"what-about-the-performance-overhead\\",children:\\"What about the performance overhead?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"As mentioned before, the inferred spans functionality is based on the widely used async-profiler, known for its minimal impact on performance. However, the efficiency of profiling operations is not without its caveats, largely influenced by the specific configurations employed. A pivotal factor in this balancing act is the sampling interval \\\\u2014 the longer the interval between samples, the lower the incurred overhead, albeit at the expense of potentially overlooking shorter methods that could be critical to the inferred spans feature discovery process.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Adjusting the probability-based trace sampling presents another way for optimization, directly influencing the overhead. For instance, setting trace sampling to 50% effectively halves the profiling load, making the inferred spans feature even more resource-efficient on average per request. This nuanced approach to tuning ensures that the inferred spans feature can be leveraged in real-world, production environments with a manageable performance footprint. When properly configured, this feature offers a potent, low-overhead solution for enhancing observability and diagnostic capabilities within production applications.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"whats-next-for-inferred-spans-and-opentelemetry\\",children:\\"What\\\\u2019s next for inferred spans and OpenTelemetry?\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"This blog post outlined and introduced the inferred spans feature available as an extension for the OpenTelemetry Java SDK and built into the newly introduced Elastic OpenTelemetry Java Distro. Inferred spans allows users to troubleshoot latency issues in areas of code that are not explicitly instrumented while utilizing traditional tracing data.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The feature is currently merely a port of the existing feature from the proprietary Elastic APM Agent. With Elastic embracing OpenTelemetry, we plan on contributing this extension to the upstream OpenTelemetry project. For that, we also plan on migrating the extension to the latest async-profiler 3.x release. \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java/tree/main/inferred-spans\\",rel:\\"nofollow\\",children:\\"Try out inferred spans for yourself\\"}),\\" and see how it can help you diagnose performance problems in your applications.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function h(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(d,{...n})}):d(n)}return w(T);})();\\n;return Component;"},"_id":"articles/tracing-data-inferred-spans-opentelemetry.mdx","_raw":{"sourceFilePath":"articles/tracing-data-inferred-spans-opentelemetry.mdx","sourceFileName":"tracing-data-inferred-spans-opentelemetry.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/tracing-data-inferred-spans-opentelemetry"},"type":"Article","imageUrl":"/assets/images/tracing-data-inferred-spans-opentelemetry/148360-Blog-header-image--Revealing-Unknowns-in-your-Tracing-Data-with-Inferred-Spans-in-OpenTelemetry_V1.jpg","readingTime":"10 min read","url":"/tracing-data-inferred-spans-opentelemetry","headings":[{"level":2,"title":"What is inferred spans?","href":"#what-is-inferred-spans"},{"level":2,"title":"Inferred spans in action","href":"#inferred-spans-in-action"},{"level":2,"title":"Using inferred spans with your own OpenTelemetry SDK","href":"#using-inferred-spans-with-your-own-opentelemetry-sdk"},{"level":2,"title":"How does inferred spans work?","href":"#how-does-inferred-spans-work"},{"level":2,"title":"Inferred spans vs. correlation of traces with profiling data","href":"#inferred-spans-vs-correlation-of-traces-with-profiling-data"},{"level":2,"title":"What about the performance overhead?","href":"#what-about-the-performance-overhead"},{"level":2,"title":"What’s next for inferred spans and OpenTelemetry?","href":"#whats-next-for-inferred-spans-and-opentelemetry"}]},{"title":"Two sides of the same coin: Uniting testing and monitoring with Synthetic Monitoring","slug":"testing-monitoring-synthetic-monitoring","date":"2023-02-06","description":"DevOps aims to establish complementary practices across development and operations. See how Playwright, @elastic/synthetics, GitHub Actions, and Elastic Synthetics can unite development and SRE teams in validating and monitoring the user experience.","image":"digital-experience-monitoring.jpg","author":[{"slug":"carly-richmond","type":"Author","_raw":{}}],"tags":[{"slug":"gitops","type":"Tag","_raw":{}},{"slug":"synthetics","type":"Tag","_raw":{}},{"slug":"javascript","type":"Tag","_raw":{}}],"body":{"raw":"\\nHistorically, software development and SRE have worked in silos with different cultural perspectives and priorities. The goal of DevOps is to establish common and complementary practices across software development and operations. However, for some organizations true collaboration is rare, and we still have a way to go to build effective DevOps partnerships.\\n\\nOutside of cultural challenges, one of the most common reasons for this disconnect is using different tools to achieve similar goals — case in point, end-to-end (e2e) testing versus [synthetic monitoring](https://www.elastic.co/observability/synthetic-monitoring).\\n\\nThis blog shares an overview of these techniques. Using the example repository [carlyrichmond/synthetics-replicator](https://github.com/carlyrichmond/synthetics-replicator), we’ll also show how Playwright, @elastic/synthetics, and GitHub Actions can combine forces with Elastic Synthetics and the recorder to unite development and SRE teams in validating and monitoring the user experience for a simple web application hosted on a provider such as [Netlify](https://www.netlify.com/).\\n\\nElastic recently [introduced synthetics monitoring](https://www.elastic.co/blog/new-synthetic-monitoring-observability), and [as highlighted in our prior blog](https://www.elastic.co/blog/why-and-how-replace-end-to-end-tests-synthetic-monitors), it can replace e2e tests altogether. Uniting around a single tool to validate the user workflow early provides a common language to recreate user issues to validate fixes against.\\n\\n## Synthetics Monitoring versus e2e tests\\n\\nIf development and operations tools are at war, it’s difficult to unify their different cultures together. Considering the definitions of these approaches shows that they in fact aim to achieve the same objective.\\n\\ne2e tests are a suite of tests that recreate the user path, including clicks, user text entry, and navigations. Although many argue it’s about testing the integration of the layers of a software application, it’s the user workflow that e2e tests emulate. Meanwhile, Synthetic Monitoring, specifically a subset known as browser monitoring, is an application performance monitoring practice that emulates the user path through an application.\\n\\nBoth these techniques emulate the user path. If we use tooling that crosses the developer and operational divide, we can work together to build tests that can also provide production monitoring in our web applications.\\n\\n## Creating user journeys\\n\\nWhen a new user workflow, or set of features that accomplish a key goal, is under development in our application, developers can use @elastic/synthetics to create user journeys. The initial project scaffolding can be generated using the init utility once installed, as in the below example. Note that Node.js must be installed prior to using this utility.\\n\\n```bash\\nnpm install -g @elastic/synthetics\\nnpx @elastic/synthetics init synthetics-replicator-tests\\n```\\n\\nBefore commencing the wizard, make sure you have your Elastic cluster information and the Elastic Synthetics integration set on your cluster. You will need:\\n\\n1. Monitor Management must be enabled within the Elastic Synthetics app as per the prerequisites in the [documentation getting started](https://www.elastic.co/guide/en/observability/8.8/synthetics-get-started-project.html#_prerequisites).\\n2. The Elastic Cloud cluster Cloud ID if using Elastic Cloud. Alternatively, if you are using on-prem hosting you need to enter your Kibana endpoint.\\n3. An API key generated from your cluster. There is a shortcut in the Synthetics application Settings to generate this key under the Project API Keys tab, as shown [in the documentation](https://www.elastic.co/guide/en/observability/current/synthetics-get-started-project.html#synthetics-get-started-project-init).\\n\\nThis wizard will take you through and generate a sample project containing configuration and example monitor journeys, with a structure similar to the below:\\n\\n![synthetics replicator tests](/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-synthetics-replicator-tests.png)\\n\\nFor web developers, most of the elements such as the README and package.json and lock files will be familiar. The main configuration for your monitors is available in synthetics.config.tsas shown below. This configuration can be amended to include production and development-specific configuration. This is essential for combining forces and reusing the same monitors for e2e tests and allowing for any journeys to be used as e2e tests and production monitors. Although not in this example, details of [private locations](https://www.elastic.co/guide/en/observability/current/synthetics-private-location.html) can be included if you would prefer to monitor from your own dedicated Elastic instance rather than from Elastic infrastructure.\\n\\n```javascript\\nimport type { SyntheticsConfig } from \\"@elastic/synthetics\\";\\n\\nexport default (env) => {\\n const config: SyntheticsConfig = {\\n params: {\\n url: \\"https://localhost:5173\\",\\n },\\n playwrightOptions: {\\n ignoreHTTPSErrors: false,\\n },\\n /**\\n * Configure global monitor settings\\n */\\n monitor: {\\n schedule: 10,\\n locations: [\\"united_kingdom\\"],\\n privateLocations: [],\\n },\\n /**\\n * Project monitors settings\\n */\\n project: {\\n id: \\"synthetics-replicator-tests\\",\\n url: \\"https://elastic-deployment:port\\",\\n space: \\"default\\",\\n },\\n };\\n if (env === \\"production\\") {\\n config.params = { url: \\"https://synthetics-replicator.netlify.app/\\" };\\n }\\n return config;\\n};\\n```\\n\\n## Writing your first journey\\n\\nAlthough the above configuration applies to all monitors in the project, it can be overridden for a given test.\\n\\n```javascript\\nimport { journey, step, monitor, expect, before } from \\"@elastic/synthetics\\";\\n\\njourney(\\"Replicator Order Journey\\", ({ page, params }) => {\\n // Only relevant for the push command to create\\n // monitors in Kibana\\n monitor.use({\\n id: \\"synthetics-replicator-monitor\\",\\n schedule: 10,\\n });\\n\\n // journey steps go here\\n});\\n```\\n\\nThe @elastic/synthetics wrapper exposes many [standard test methods](https://www.elastic.co/guide/en/observability/current/synthetics-create-test.html#synthetics-syntax) such as the before and after constructs that allow for setup and tear down of typical properties in the tests, as well as support for many common assertion helper methods. A full list of supported expect methods are listed in the [documentation](https://www.elastic.co/guide/en/observability/current/synthetics-create-test.html#synthetics-assertions-methods). The Playwright page object is also exposed, which enables us to perform [all the expected activities provided in the API](https://playwright.dev/docs/api/class-page) such as locating page elements and simulating user events such as clicks that are depicted in the below example.\\n\\n```javascript\\nimport { journey, step, monitor, expect, before } from \\"@elastic/synthetics\\";\\n\\njourney(\\"Replicator Order Journey\\", ({ page, params }) => {\\n // monitor configuration goes here\\n\\n before(async () => {\\n await page.goto(params.url);\\n });\\n\\n step(\\"assert home page loads\\", async () => {\\n const header = await page.locator(\\"h1\\");\\n expect(await header.textContent()).toBe(\\"Replicatr\\");\\n });\\n\\n step(\\"assert move to order page\\", async () => {\\n const orderButton = await page.locator(\\"data-testid=order-button\\");\\n await orderButton.click();\\n\\n const url = page.url();\\n expect(url).toContain(\\"/order\\");\\n\\n const menuTiles = await page.locator(\\"data-testid=menu-item-card\\");\\n expect(await menuTiles.count()).toBeGreaterThan(2);\\n });\\n\\n // other steps go here\\n});\\n```\\n\\nAs you can see in the above example, it also exposes the journey and step constructs. This construct mirrors the behavior-driven development (BDD) practice of showing the user journey through the application in tests.\\n\\nDevelopers are able to execute the tests against a locally running application as part of their feature development to see successful and failed steps in the user workflow. In the below example, the local server startup command is outlined in blue at the top. The monitor execution command is presented in red further down.\\n\\n![](/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-synthetics-replicator-npm-start.png)\\n\\nAs you can see from the green ticks next to each journey step, each of our tests pass. Woo!\\n\\n## Gating your CI pipelines\\n\\nIt’s important to use the execution of the monitors within your CI pipeline as a gate for merging code changes and uploading the new version of your monitors. Each of the jobs in our [GitHub Actions workflow](https://github.com/carlyrichmond/synthetics-replicator/blob/main/.github/workflows/push-build-test-synthetics-replicator.yml) will be discussed in this and the subsequent section.\\n\\nThe test job spins up a test instance and runs our user journeys to validate our changes, as illustrated below. This step should run for pull requests to validate developer changes, as well as on push.\\n\\n```yaml\\njobs:\\n test:\\n env:\\n NODE_ENV: development\\n runs-on: ubuntu-latest\\n steps:\\n - uses: actions/checkout@v3\\n - uses: actions/setup-node@v3\\n with:\\n node-version: 18\\n - run: npm install\\n - run: npm start &\\n - run: \\"npm install @elastic/synthetics && SYNTHETICS_JUNIT_FILE=\'junit-synthetics.xml\' npx @elastic/synthetics . --reporter=junit\\"\\n working-directory: ./apps/synthetics-replicator-tests/journeys\\n - name: Publish Unit Test Results\\n uses: EnricoMi/publish-unit-test-result-action@v2\\n if: always()\\n with:\\n junit_files: \\"**/junit-*.xml\\"\\n check_name: Elastic Synthetics Tests\\n```\\n\\nNote that, unlike the journey execution on our local machine, we make use of the --reporter=junit option when executing npx @elastic/synthetics to provide visibility of our passing, or sadly sometimes failing, journeys to the CI job.\\n\\n![](/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-synthetics-tests.png)\\n\\n## Automatically upload monitors\\n\\nTo ensure the latest monitors are available in Elastic Uptime, it’s advisable to push the monitors programmatically as part of the CI workflow such as the example task below does. Our workflow has a second job push, shown below, which is dependent on the successful execution of our test job that uploads your monitors to your cluster. Note that this job is configured in our workflow to run on push to ensure changes have been validated rather than just raised within a pull request.\\n\\n```yaml\\njobs:\\n test: …\\n push:\\n env:\\n NODE_ENV: production\\n SYNTHETICS_API_KEY: ${{ secrets.SYNTHETICS_API_KEY }}\\n needs: test\\n defaults:\\n run:\\n working-directory: ./apps/synthetics-replicator-tests\\n runs-on: ubuntu-latest\\n steps:\\n - uses: actions/checkout@v3\\n - uses: actions/setup-node@v3\\n with:\\n node-version: 18\\n - run: npm install\\n - run: npm run push\\n```\\n\\nThe @elastic/synthetics init wizard generates a push command for you when you create your project that can be triggered from the project folder. This is shown below through the steps and working_directory configuration. The push command requires the API key from your Elastic cluster, which should be stored as a secret within a trusted vault and referenced via a workflow environment variable. It is also vital that monitors pass ahead of pushing the updated monitor configuration to your Elastic Synthetics instance to prevent breaking your production monitoring. Unlike e2e tests running against a testing environment, broken monitors impact SRE activities and therefore any changes need to be validated. For that reason, applying a dependency to your test step via the needs option is recommended.\\n\\n![](/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-push-build-test-synthetics-replicator.png)\\n\\n## Monitoring using Elastic Synthetics\\n\\nOnce monitors have been uploaded, they give a regular checkpoint to SRE teams as to whether the user workflow is functioning as intended — not just because they will run on a regular schedule as configured for the project and individual tests as shown previously, but also due to the ability to check the state of all monitor runs and execute them on demand.\\n\\nThe Monitors Overview tab gives us an immediate view of the status of all configured monitors, as well as the ability to run the monitor manually via the card ellipsis menu.\\n\\n![elastic observability monitors](/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-monitors.png)\\n\\nFrom the Monitor screen, we can also navigate to an overview of an individual monitor execution to investigate failures.\\n\\n![test run details](/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-test-run-details.png)\\n\\nThe other monitoring superpower SREs now have is the integration between these monitors to familiar tools SREs already use in scrutinizing the performance and availability of applications such as APM, metrics, and logs. The aptly named **Investigate** menu allows easy navigation while SREs are performing investigations into potential failures or bottlenecks.\\n\\nThere is also a balance between finding issues and being notified of potential problems automatically. SREs already familiar with setting rules and thresholds for notification of issues will be happy to know that this is also possible for browser monitors. The editing of an example rule is shown below.\\n\\n![elastic observability rules](/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-rules.png)\\n\\nThe status of browser monitors can be configured not only to consider if any individual or collective monitors have been down several times, such as in the status check above, but also to gauge the overall availability by looking at the percentage of passed checks within a given time period. SREs are not only interested in reacting to issues in a traditional production management way — they want to improve the availability of applications, too.\\n\\n## Recording user workflows\\n\\nThe limitation of generating e2e tests through the development lifecycle is that sometimes teams miss things, and the prior toolset is geared toward development teams. Despite the best intentions to design an intuitive product using multi-discipline teams, users may use applications in unintended ways. Furthermore, the monitors written by developers will only cover those expected workflows and raise the alarm either when these monitors fail in production or when they start to behave differently if anomaly detection is applied to them.\\n\\nWhen user issues arise, it’s useful to recreate that problem in the same format as our monitors. It’s also important to leverage the experience of SREs in generating user journeys, as they will consider failure cases intuitively where developers may struggle and focus on happy cases. However, not all SREs will have the experience or confidence to write these journeys using Playwright and @elastic/synthetics.\\n\\n\\n\\nEnter the Elastic Synthetics Recorder! The above video gives a walkthrough of how it can be used to record the steps in a user journey and export them to a JavaScript file for inclusion in your monitor project. This is useful for feeding back into the development phase and testing developed fixes to solve the problem. This approach cannot be made unless we all combine forces to use these monitors together.\\n\\n## Try it out!\\n\\nAs of 8.8, @elastic/synthetics and the Elastic Synthetics app are generally available, and the trusty recorder is in beta. Share your experiences of bridging the developer and operations divide with Synthetic Monitoring via the [Uptime category](https://discuss.elastic.co/c/observability/uptime/75) in the Community Discuss forums or via [Slack](https://ela.st/slack).\\n\\nHappy monitoring!\\n\\n_Originally published February 6, 2023; updated May 23, 2023._\\n\\n> 1. [Why and how to replace end-to-end tests with synthetic monitors](/blog/why-and-how-replace-end-to-end-tests-synthetic-monitors)\\n> 2. [Uptime and Synthetic Monitoring](https://www.elastic.co/guide/en/observability/current/monitor-uptime-synthetics.html#monitor-uptime-synthetics)\\n> 3. [Scripting browser monitors](https://www.elastic.co/guide/en/observability/current/synthetics-journeys.html)\\n> 4. [Use the Synthetics Recorder](https://www.elastic.co/guide/en/observability/current/synthetics-recorder.html)\\n> 5. [Playwright](https://playwright.dev/)\\n> 6. [GitHub Actions](https://docs.github.com/en/actions)\\n","code":"var Component=(()=>{var u=Object.create;var s=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,y=Object.prototype.hasOwnProperty;var f=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var i in e)s(n,i,{get:e[i],enumerable:!0})},a=(n,e,i,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of m(e))!y.call(n,o)&&o!==i&&s(n,o,{get:()=>e[o],enumerable:!(r=p(e,o))||r.enumerable});return n};var b=(n,e,i)=>(i=n!=null?u(g(n)):{},a(e||!n||!n.__esModule?s(i,\\"default\\",{value:n,enumerable:!0}):i,n)),v=n=>a(s({},\\"__esModule\\",{value:!0}),n);var c=f((T,l)=>{l.exports=_jsx_runtime});var k={};w(k,{default:()=>d,frontmatter:()=>j});var t=b(c()),j={title:\\"Two sides of the same coin: Uniting testing and monitoring with Synthetic Monitoring\\",slug:\\"testing-monitoring-synthetic-monitoring\\",date:\\"2023-02-06\\",description:\\"DevOps aims to establish complementary practices across development and operations. See how Playwright, @elastic/synthetics, GitHub Actions, and Elastic Synthetics can unite development and SRE teams in validating and monitoring the user experience.\\",author:[{slug:\\"carly-richmond\\"}],image:\\"digital-experience-monitoring.jpg\\",tags:[{slug:\\"gitops\\"},{slug:\\"synthetics\\"},{slug:\\"javascript\\"}]};function h(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",...n.components},{Video:i}=e;return i||x(\\"Video\\",!0),(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.p,{children:\\"Historically, software development and SRE have worked in silos with different cultural perspectives and priorities. The goal of DevOps is to establish common and complementary practices across software development and operations. However, for some organizations true collaboration is rare, and we still have a way to go to build effective DevOps partnerships.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Outside of cultural challenges, one of the most common reasons for this disconnect is using different tools to achieve similar goals \\\\u2014 case in point, end-to-end (e2e) testing versus \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/synthetic-monitoring\\",rel:\\"nofollow\\",children:\\"synthetic monitoring\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"This blog shares an overview of these techniques. Using the example repository \\",(0,t.jsx)(e.a,{href:\\"https://github.com/carlyrichmond/synthetics-replicator\\",rel:\\"nofollow\\",children:\\"carlyrichmond/synthetics-replicator\\"}),\\", we\\\\u2019ll also show how Playwright, @elastic/synthetics, and GitHub Actions can combine forces with Elastic Synthetics and the recorder to unite development and SRE teams in validating and monitoring the user experience for a simple web application hosted on a provider such as \\",(0,t.jsx)(e.a,{href:\\"https://www.netlify.com/\\",rel:\\"nofollow\\",children:\\"Netlify\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Elastic recently \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/new-synthetic-monitoring-observability\\",rel:\\"nofollow\\",children:\\"introduced synthetics monitoring\\"}),\\", and \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/why-and-how-replace-end-to-end-tests-synthetic-monitors\\",rel:\\"nofollow\\",children:\\"as highlighted in our prior blog\\"}),\\", it can replace e2e tests altogether. Uniting around a single tool to validate the user workflow early provides a common language to recreate user issues to validate fixes against.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"synthetics-monitoring-versus-e2e-tests\\",children:\\"Synthetics Monitoring versus e2e tests\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If development and operations tools are at war, it\\\\u2019s difficult to unify their different cultures together. Considering the definitions of these approaches shows that they in fact aim to achieve the same objective.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"e2e tests are a suite of tests that recreate the user path, including clicks, user text entry, and navigations. Although many argue it\\\\u2019s about testing the integration of the layers of a software application, it\\\\u2019s the user workflow that e2e tests emulate. Meanwhile, Synthetic Monitoring, specifically a subset known as browser monitoring, is an application performance monitoring practice that emulates the user path through an application.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Both these techniques emulate the user path. If we use tooling that crosses the developer and operational divide, we can work together to build tests that can also provide production monitoring in our web applications.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"creating-user-journeys\\",children:\\"Creating user journeys\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"When a new user workflow, or set of features that accomplish a key goal, is under development in our application, developers can use @elastic/synthetics to create user journeys. The initial project scaffolding can be generated using the init utility once installed, as in the below example. Note that Node.js must be installed prior to using this utility.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-bash\\",children:`npm install -g @elastic/synthetics\\nnpx @elastic/synthetics init synthetics-replicator-tests\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Before commencing the wizard, make sure you have your Elastic cluster information and the Elastic Synthetics integration set on your cluster. You will need:\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Monitor Management must be enabled within the Elastic Synthetics app as per the prerequisites in the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.8/synthetics-get-started-project.html#_prerequisites\\",rel:\\"nofollow\\",children:\\"documentation getting started\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.li,{children:\\"The Elastic Cloud cluster Cloud ID if using Elastic Cloud. Alternatively, if you are using on-prem hosting you need to enter your Kibana endpoint.\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"An API key generated from your cluster. There is a shortcut in the Synthetics application Settings to generate this key under the Project API Keys tab, as shown \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/synthetics-get-started-project.html#synthetics-get-started-project-init\\",rel:\\"nofollow\\",children:\\"in the documentation\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"This wizard will take you through and generate a sample project containing configuration and example monitor journeys, with a structure similar to the below:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-synthetics-replicator-tests.png\\",alt:\\"synthetics replicator tests\\",width:\\"1999\\",height:\\"1187\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For web developers, most of the elements such as the README and package.json and lock files will be familiar. The main configuration for your monitors is available in synthetics.config.tsas shown below. This configuration can be amended to include production and development-specific configuration. This is essential for combining forces and reusing the same monitors for e2e tests and allowing for any journeys to be used as e2e tests and production monitors. Although not in this example, details of \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/synthetics-private-location.html\\",rel:\\"nofollow\\",children:\\"private locations\\"}),\\" can be included if you would prefer to monitor from your own dedicated Elastic instance rather than from Elastic infrastructure.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-javascript\\",children:`import type { SyntheticsConfig } from \\"@elastic/synthetics\\";\\n\\nexport default (env) => {\\n const config: SyntheticsConfig = {\\n params: {\\n url: \\"https://localhost:5173\\",\\n },\\n playwrightOptions: {\\n ignoreHTTPSErrors: false,\\n },\\n /**\\n * Configure global monitor settings\\n */\\n monitor: {\\n schedule: 10,\\n locations: [\\"united_kingdom\\"],\\n privateLocations: [],\\n },\\n /**\\n * Project monitors settings\\n */\\n project: {\\n id: \\"synthetics-replicator-tests\\",\\n url: \\"https://elastic-deployment:port\\",\\n space: \\"default\\",\\n },\\n };\\n if (env === \\"production\\") {\\n config.params = { url: \\"https://synthetics-replicator.netlify.app/\\" };\\n }\\n return config;\\n};\\n`})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"writing-your-first-journey\\",children:\\"Writing your first journey\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Although the above configuration applies to all monitors in the project, it can be overridden for a given test.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-javascript\\",children:`import { journey, step, monitor, expect, before } from \\"@elastic/synthetics\\";\\n\\njourney(\\"Replicator Order Journey\\", ({ page, params }) => {\\n // Only relevant for the push command to create\\n // monitors in Kibana\\n monitor.use({\\n id: \\"synthetics-replicator-monitor\\",\\n schedule: 10,\\n });\\n\\n // journey steps go here\\n});\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The @elastic/synthetics wrapper exposes many \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/synthetics-create-test.html#synthetics-syntax\\",rel:\\"nofollow\\",children:\\"standard test methods\\"}),\\" such as the before and after constructs that allow for setup and tear down of typical properties in the tests, as well as support for many common assertion helper methods. A full list of supported expect methods are listed in the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/synthetics-create-test.html#synthetics-assertions-methods\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\". The Playwright page object is also exposed, which enables us to perform \\",(0,t.jsx)(e.a,{href:\\"https://playwright.dev/docs/api/class-page\\",rel:\\"nofollow\\",children:\\"all the expected activities provided in the API\\"}),\\" such as locating page elements and simulating user events such as clicks that are depicted in the below example.\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-javascript\\",children:`import { journey, step, monitor, expect, before } from \\"@elastic/synthetics\\";\\n\\njourney(\\"Replicator Order Journey\\", ({ page, params }) => {\\n // monitor configuration goes here\\n\\n before(async () => {\\n await page.goto(params.url);\\n });\\n\\n step(\\"assert home page loads\\", async () => {\\n const header = await page.locator(\\"h1\\");\\n expect(await header.textContent()).toBe(\\"Replicatr\\");\\n });\\n\\n step(\\"assert move to order page\\", async () => {\\n const orderButton = await page.locator(\\"data-testid=order-button\\");\\n await orderButton.click();\\n\\n const url = page.url();\\n expect(url).toContain(\\"/order\\");\\n\\n const menuTiles = await page.locator(\\"data-testid=menu-item-card\\");\\n expect(await menuTiles.count()).toBeGreaterThan(2);\\n });\\n\\n // other steps go here\\n});\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see in the above example, it also exposes the journey and step constructs. This construct mirrors the behavior-driven development (BDD) practice of showing the user journey through the application in tests.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Developers are able to execute the tests against a locally running application as part of their feature development to see successful and failed steps in the user workflow. In the below example, the local server startup command is outlined in blue at the top. The monitor execution command is presented in red further down.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-synthetics-replicator-npm-start.png\\",alt:\\"\\",width:\\"1896\\",height:\\"1330\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"As you can see from the green ticks next to each journey step, each of our tests pass. Woo!\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"gating-your-ci-pipelines\\",children:\\"Gating your CI pipelines\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"It\\\\u2019s important to use the execution of the monitors within your CI pipeline as a gate for merging code changes and uploading the new version of your monitors. Each of the jobs in our \\",(0,t.jsx)(e.a,{href:\\"https://github.com/carlyrichmond/synthetics-replicator/blob/main/.github/workflows/push-build-test-synthetics-replicator.yml\\",rel:\\"nofollow\\",children:\\"GitHub Actions workflow\\"}),\\" will be discussed in this and the subsequent section.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The test job spins up a test instance and runs our user journeys to validate our changes, as illustrated below. This step should run for pull requests to validate developer changes, as well as on push.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`jobs:\\n test:\\n env:\\n NODE_ENV: development\\n runs-on: ubuntu-latest\\n steps:\\n - uses: actions/checkout@v3\\n - uses: actions/setup-node@v3\\n with:\\n node-version: 18\\n - run: npm install\\n - run: npm start &\\n - run: \\"npm install @elastic/synthetics && SYNTHETICS_JUNIT_FILE=\'junit-synthetics.xml\' npx @elastic/synthetics . --reporter=junit\\"\\n working-directory: ./apps/synthetics-replicator-tests/journeys\\n - name: Publish Unit Test Results\\n uses: EnricoMi/publish-unit-test-result-action@v2\\n if: always()\\n with:\\n junit_files: \\"**/junit-*.xml\\"\\n check_name: Elastic Synthetics Tests\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Note that, unlike the journey execution on our local machine, we make use of the --reporter=junit option when executing npx @elastic/synthetics to provide visibility of our passing, or sadly sometimes failing, journeys to the CI job.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-synthetics-tests.png\\",alt:\\"\\",width:\\"1932\\",height:\\"1999\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"automatically-upload-monitors\\",children:\\"Automatically upload monitors\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"To ensure the latest monitors are available in Elastic Uptime, it\\\\u2019s advisable to push the monitors programmatically as part of the CI workflow such as the example task below does. Our workflow has a second job push, shown below, which is dependent on the successful execution of our test job that uploads your monitors to your cluster. Note that this job is configured in our workflow to run on push to ensure changes have been validated rather than just raised within a pull request.\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-yaml\\",children:`jobs:\\n test: \\\\u2026\\n push:\\n env:\\n NODE_ENV: production\\n SYNTHETICS_API_KEY: \\\\${{ secrets.SYNTHETICS_API_KEY }}\\n needs: test\\n defaults:\\n run:\\n working-directory: ./apps/synthetics-replicator-tests\\n runs-on: ubuntu-latest\\n steps:\\n - uses: actions/checkout@v3\\n - uses: actions/setup-node@v3\\n with:\\n node-version: 18\\n - run: npm install\\n - run: npm run push\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The @elastic/synthetics init wizard generates a push command for you when you create your project that can be triggered from the project folder. This is shown below through the steps and working_directory configuration. The push command requires the API key from your Elastic cluster, which should be stored as a secret within a trusted vault and referenced via a workflow environment variable. It is also vital that monitors pass ahead of pushing the updated monitor configuration to your Elastic Synthetics instance to prevent breaking your production monitoring. Unlike e2e tests running against a testing environment, broken monitors impact SRE activities and therefore any changes need to be validated. For that reason, applying a dependency to your test step via the needs option is recommended.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-push-build-test-synthetics-replicator.png\\",alt:\\"\\",width:\\"1812\\",height:\\"502\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"monitoring-using-elastic-synthetics\\",children:\\"Monitoring using Elastic Synthetics\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once monitors have been uploaded, they give a regular checkpoint to SRE teams as to whether the user workflow is functioning as intended \\\\u2014 not just because they will run on a regular schedule as configured for the project and individual tests as shown previously, but also due to the ability to check the state of all monitor runs and execute them on demand.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Monitors Overview tab gives us an immediate view of the status of all configured monitors, as well as the ability to run the monitor manually via the card ellipsis menu.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-monitors.png\\",alt:\\"elastic observability monitors\\",width:\\"1999\\",height:\\"1679\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"From the Monitor screen, we can also navigate to an overview of an individual monitor execution to investigate failures.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-test-run-details.png\\",alt:\\"test run details\\",width:\\"1999\\",height:\\"1756\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The other monitoring superpower SREs now have is the integration between these monitors to familiar tools SREs already use in scrutinizing the performance and availability of applications such as APM, metrics, and logs. The aptly named \\",(0,t.jsx)(e.strong,{children:\\"Investigate\\"}),\\" menu allows easy navigation while SREs are performing investigations into potential failures or bottlenecks.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"There is also a balance between finding issues and being notified of potential problems automatically. SREs already familiar with setting rules and thresholds for notification of issues will be happy to know that this is also possible for browser monitors. The editing of an example rule is shown below.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/testing-monitoring-synthetic-monitoring/blog-elastic-rules.png\\",alt:\\"elastic observability rules\\",width:\\"1999\\",height:\\"1409\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The status of browser monitors can be configured not only to consider if any individual or collective monitors have been down several times, such as in the status check above, but also to gauge the overall availability by looking at the percentage of passed checks within a given time period. SREs are not only interested in reacting to issues in a traditional production management way \\\\u2014 they want to improve the availability of applications, too.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"recording-user-workflows\\",children:\\"Recording user workflows\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"The limitation of generating e2e tests through the development lifecycle is that sometimes teams miss things, and the prior toolset is geared toward development teams. Despite the best intentions to design an intuitive product using multi-discipline teams, users may use applications in unintended ways. Furthermore, the monitors written by developers will only cover those expected workflows and raise the alarm either when these monitors fail in production or when they start to behave differently if anomaly detection is applied to them.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"When user issues arise, it\\\\u2019s useful to recreate that problem in the same format as our monitors. It\\\\u2019s also important to leverage the experience of SREs in generating user journeys, as they will consider failure cases intuitively where developers may struggle and focus on happy cases. However, not all SREs will have the experience or confidence to write these journeys using Playwright and @elastic/synthetics.\\"}),`\\n`,(0,t.jsx)(i,{vidyardUuid:\\"NnJFuY5mpCdUNfLJSMAma3\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Enter the Elastic Synthetics Recorder! The above video gives a walkthrough of how it can be used to record the steps in a user journey and export them to a JavaScript file for inclusion in your monitor project. This is useful for feeding back into the development phase and testing developed fixes to solve the problem. This approach cannot be made unless we all combine forces to use these monitors together.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"try-it-out\\",children:\\"Try it out!\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"As of 8.8, @elastic/synthetics and the Elastic Synthetics app are generally available, and the trusty recorder is in beta. Share your experiences of bridging the developer and operations divide with Synthetic Monitoring via the \\",(0,t.jsx)(e.a,{href:\\"https://discuss.elastic.co/c/observability/uptime/75\\",rel:\\"nofollow\\",children:\\"Uptime category\\"}),\\" in the Community Discuss forums or via \\",(0,t.jsx)(e.a,{href:\\"https://ela.st/slack\\",rel:\\"nofollow\\",children:\\"Slack\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Happy monitoring!\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"Originally published February 6, 2023; updated May 23, 2023.\\"})}),`\\n`,(0,t.jsxs)(e.blockquote,{children:[`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"/blog/why-and-how-replace-end-to-end-tests-synthetic-monitors\\",children:\\"Why and how to replace end-to-end tests with synthetic monitors\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/monitor-uptime-synthetics.html#monitor-uptime-synthetics\\",rel:\\"nofollow\\",children:\\"Uptime and Synthetic Monitoring\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/synthetics-journeys.html\\",rel:\\"nofollow\\",children:\\"Scripting browser monitors\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/synthetics-recorder.html\\",rel:\\"nofollow\\",children:\\"Use the Synthetics Recorder\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://playwright.dev/\\",rel:\\"nofollow\\",children:\\"Playwright\\"})}),`\\n`,(0,t.jsx)(e.li,{children:(0,t.jsx)(e.a,{href:\\"https://docs.github.com/en/actions\\",rel:\\"nofollow\\",children:\\"GitHub Actions\\"})}),`\\n`]}),`\\n`]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(h,{...n})}):h(n)}function x(n,e){throw new Error(\\"Expected \\"+(e?\\"component\\":\\"object\\")+\\" `\\"+n+\\"` to be defined: you likely forgot to import, pass, or provide it.\\")}return v(k);})();\\n;return Component;"},"_id":"articles/uniting-testing-and-monitoring-with-synthetic-monitoring.mdx","_raw":{"sourceFilePath":"articles/uniting-testing-and-monitoring-with-synthetic-monitoring.mdx","sourceFileName":"uniting-testing-and-monitoring-with-synthetic-monitoring.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/uniting-testing-and-monitoring-with-synthetic-monitoring"},"type":"Article","imageUrl":"/assets/images/testing-monitoring-synthetic-monitoring/digital-experience-monitoring.jpg","readingTime":"13 min read","url":"/testing-monitoring-synthetic-monitoring","headings":[{"level":2,"title":"Synthetics Monitoring versus e2e tests","href":"#synthetics-monitoring-versus-e2e-tests"},{"level":2,"title":"Creating user journeys","href":"#creating-user-journeys"},{"level":2,"title":"Writing your first journey","href":"#writing-your-first-journey"},{"level":2,"title":"Gating your CI pipelines","href":"#gating-your-ci-pipelines"},{"level":2,"title":"Automatically upload monitors","href":"#automatically-upload-monitors"},{"level":2,"title":"Monitoring using Elastic Synthetics","href":"#monitoring-using-elastic-synthetics"},{"level":2,"title":"Recording user workflows","href":"#recording-user-workflows"},{"level":2,"title":"Try it out!","href":"#try-it-out"}]},{"title":"Universal Profiling: Detecting CO2 and energy efficiency","slug":"universal-profiling-detecting-co2-energy-efficiency","date":"2024-02-05","description":"Universal Profiling introduces the possibility to capture environmental impact. In this post, we compare Python and Go implementations and showcase the substantial CO2 savings achieved through code optimization.","image":"141935_-_Blog_header_image-_Op1_V1.jpg","author":[{"slug":"philipp-kahr","type":"Author","_raw":{}},{"slug":"francesco-gualazzi","type":"Author","_raw":{}}],"tags":[{"slug":"universal-profiling","type":"Tag","_raw":{}},{"slug":"cloud-monitoring","type":"Tag","_raw":{}}],"body":{"raw":"\\nA while ago, we posted a [blog](https://www.elastic.co/blog/importing-chess-games-elasticsearch-universal-profiling) that detailed how we imported over 4 billion chess games with speed using Python and optimized the code leveraging our Universal ProfilingTM. This was based on Elastic Stack running on version 8.9. We are now on [8.12](https://www.elastic.co/blog/whats-new-elastic-8-12-0), and it is time to do a second part that shows how easy it is to observe compiled languages and how Elastic\xae’s Universal Profiling can help you determine the benefit of a rewrite, both from a cost and environmental friendliness angle.\\n\\n## Why efficiency matters — for you and the environment\\n\\nData centers are estimated to consume ~3% of global electricity consumption, and their usage is expected to double by 2030.\\\\* The cost of a digital service is a close proxy to its computing efficiency, and thus, being more efficient is a win-win: less energy consumed, smaller bill.\\n\\nIn the same scenario, companies want the ability to scale to more users while spending less for each user and are effectively looking into methods of reducing their energy consumption.\\n\\nIn this spirit, [Universal Profiling](https://www.elastic.co/observability/universal-profiling) comes equipped with data and visualizations to help determine where efficiency improvement efforts are worth the most.\\n\\n[Energy efficiency](https://www.elastic.co/blog/continuous-profiling-efficient-cost-effective-applications) measures how much a digital service consumes to produce an output given an input. It can be measured in multiple ways, and we at Elastic Observability chose CO2 emissions and annualized CO2 emissions (more details on them later).\\n\\nLet’s take the example of an e-commerce website: the energy efficiency of the “search inventory” process could be calculated as the average CPU time needed to serve a user request. Once the baseline for this value is determined, changes to the software delivering the search process may result in more or less CPU time consumed for the same feature, resulting in less or more efficient code.\\n\\n## How to set up and configure wattage and CO2\\n\\nYou can find a “Settings” button in the top-right corner of the Universal Profiling views. From there, you can customize the coefficient used to calculate CO2 emissions tied to profiling data.\\n\\nThe values set here will be used only when the profiles gathered from host agents are not already associated with publicly known data certified by cloud providers. For example, suppose you have a hybrid cloud deployment with a portion of your workload running on-premise and a portion running in GCP. In that case, the values set here will only be used to calculate the CO2 emissions for the on-premise machines; we already use all the coefficients as declared by GCP to calculate the emissions of those machines.\\n\\n## Python vs. Go\\n\\nOur first [blog post](https://www.elastic.co/blog/importing-chess-games-elasticsearch-universal-profiling) implemented a solution to read PGN chess games, a text representation in Python. It showed how Universal Profiler can be leveraged to identify slow functions and help you rewrite your code faster and more efficiently. At the end of it, we were happy with the Python version. It is still used today to grab the monthly updates from the [Lichess database](https://database.lichess.org/) and ingest them into Elasticsearch\xae. I always wanted a reason to work more with Go, and we rewrote Python to Go. We leveraged goroutines and channels to send data through message passing. You can see more about it in our [GitHub repository](https://github.com/philippkahr/blogs/tree/main/universal-profiling).\\n\\nRewriting in Go also means switching from an interpreted language to a compiled one. As with everything in IT, this has benefits as well as disadvantages. One disadvantage is that we must ship debug symbols for the compiled binary. When we build the binary, we can use the symbtool program to ship the debug symbols. Without debug symbols, we see uninterpretable information as frames will be labeled with hexadecimal addresses in the flame graph rather than source code annotations.\\n\\nFirst, make sure that your executable includes debug symbols. Go per default builds with debug symbols. You can check this by using file yourbinary. The important part is that it is not stripped.\\n\\n```bash\\nfile lichess\\nlichess: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, Go BuildID=gufIkqA61WnCh8haeW-2/lfn3ne3U_y8MGoFD4AvT/QJEykzbacbYEmEQpXH6U/MqVbk-402n1k3B8yPB6I, with debug_info, not stripped\\n```\\n\\nNow we need to push the symbols using symbtool. You must create an Elasticsearch API key as the authentication method. In the Universal Profiler UI in Kibana\xae, an **Add Data** button in the top right corner will tell you exactly what to do. The command is like this. The -e is the part where you pass through the path of your executable file. In our case, this is lichess as above.\\n\\n```bash\\nsymbtool push-symbols executable -t \\"ApiKey\\" -u \\"elasticsearch-url\\" -e \\"lichess\\"\\n```\\n\\nNow that debug symbols are available inside the cluster, we can run both implementations with the same file simultaneously and see what Universal Profiler can tell us about it.\\n\\n## Identifying CO2 and energy efficiency savings\\n\\nPython is more frequently scheduled on the CPU. Thus, it runs more often on the hardware and contributes more to the machines’ resource usage.\\n\\nWe use the differential flame graph to identify and automatically calculate the difference in the following comparison. You need to filter on process.thread.name: “python3.11” in the baseline, and for the comparison, filter for lichess.\\n\\n![1 - universal profiling](/assets/images/universal-profiling-detecting-co2-energy-efficiency/1-elastic-blog-uni-profiling.png)\\n\\nLooking at the impact of annualized CO2 emissions, we see a decrease from 65.32kg of CO2 from the Python solution to 16.78kg. That is a difference of 48.54kg CO2 savings over a year.\\n\\nIf we take a step back, we’ll want to figure out why Python produces many more emissions. In the flamegraph view, we filter down to just showing Python, and we can click on the first frame called python3.11. A little popup tells us that it caused 32.95kg of emissions. That is nearly 50% of all emissions caused by the runtime. Our program itself caused the other ~32kg of CO2. We immediately reduced 32kg of annual emissions by cutting out the Python interpreter with Go.\\n\\nWe can lock that box using a right click and click **Show more information**.\\n\\n![2 - universal profiling graphs blue-orange](/assets/images/universal-profiling-detecting-co2-energy-efficiency/2-elastic-blog-uni-profiling.png)\\n\\nThe **Show more information** link displays detailed information about the frame, like sample count, total CPU, core seconds, and dollar costs. We won’t go into more detail in this blog.\\n\\n![3 impact estimates](/assets/images/universal-profiling-detecting-co2-energy-efficiency/3-elastic-blog-uni-profiling.png)\\n\\n## Reduce your carbon footprint today with Universal Profiling\\n\\nThis blog post demonstrates that rewriting your code base can reduce your carbon footprint immensely. Using Universal Profiler, you could do a quick PoC to showcase how much carbon resources can be spared.\\n\\nLearn how you can [get started](https://www.elastic.co/guide/en/observability/current/profiling-get-started.html) with Elastic Universal Profiling today.\\n\\n> - Cluster for storing the data where three nodes, each 64GB RAM and 32 CPU cores, are running GCP on Elastic Cloud.\\n> - The machine for sending the data is a GCP e2-standard-32, thus 128GB RAM and 32 CPU cores with a 500GB balanced disk to read the games from.\\n> - The file used for the games is this [Lichess database](https://database.lichess.org/standard/lichess_db_standard_rated_2023-12.pgn.zst) containing 96,909,211 games. The extracted file size is 211GB.\\n\\n**Source:**\\n\\n\\\\*[https://media.ccc.de/v/camp2023-57070-energy_consumption_of_data_centers](https://media.ccc.de/v/camp2023-57070-energy_consumption_of_data_centers)\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var u=Object.create;var a=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var f=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,p=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),w=(n,e)=>{for(var t in e)a(n,t,{get:e[t],enumerable:!0})},r=(n,e,t,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of f(e))!p.call(n,o)&&o!==t&&a(n,o,{get:()=>e[o],enumerable:!(s=g(e,o))||s.enumerable});return n};var b=(n,e,t)=>(t=n!=null?u(m(n)):{},r(e||!n||!n.__esModule?a(t,\\"default\\",{value:n,enumerable:!0}):t,n)),v=n=>r(a({},\\"__esModule\\",{value:!0}),n);var c=y((_,l)=>{l.exports=_jsx_runtime});var P={};w(P,{default:()=>d,frontmatter:()=>k});var i=b(c()),k={title:\\"Universal Profiling: Detecting CO2 and energy efficiency\\",slug:\\"universal-profiling-detecting-co2-energy-efficiency\\",date:\\"2024-02-05\\",description:\\"Universal Profiling introduces the possibility to capture environmental impact. In this post, we compare Python and Go implementations and showcase the substantial CO2 savings achieved through code optimization.\\",author:[{slug:\\"philipp-kahr\\"},{slug:\\"francesco-gualazzi\\"}],image:\\"141935_-_Blog_header_image-_Op1_V1.jpg\\",tags:[{slug:\\"universal-profiling\\"},{slug:\\"cloud-monitoring\\"}]};function h(n){let e={a:\\"a\\",blockquote:\\"blockquote\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(e.p,{children:[\\"A while ago, we posted a \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/importing-chess-games-elasticsearch-universal-profiling\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\" that detailed how we imported over 4 billion chess games with speed using Python and optimized the code leveraging our Universal Profiling\\",(0,i.jsx)(\\"sup\\",{children:\\"TM\\"}),\\". This was based on Elastic Stack running on version 8.9. We are now on \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/whats-new-elastic-8-12-0\\",rel:\\"nofollow\\",children:\\"8.12\\"}),\\", and it is time to do a second part that shows how easy it is to observe compiled languages and how Elastic\\\\xAE\\\\u2019s Universal Profiling can help you determine the benefit of a rewrite, both from a cost and environmental friendliness angle.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"why-efficiency-matters--for-you-and-the-environment\\",children:\\"Why efficiency matters \\\\u2014 for you and the environment\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Data centers are estimated to consume ~3% of global electricity consumption, and their usage is expected to double by 2030.* The cost of a digital service is a close proxy to its computing efficiency, and thus, being more efficient is a win-win: less energy consumed, smaller bill.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"In the same scenario, companies want the ability to scale to more users while spending less for each user and are effectively looking into methods of reducing their energy consumption.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"In this spirit, \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"Universal Profiling\\"}),\\" comes equipped with data and visualizations to help determine where efficiency improvement efforts are worth the most.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-efficient-cost-effective-applications\\",rel:\\"nofollow\\",children:\\"Energy efficiency\\"}),\\" measures how much a digital service consumes to produce an output given an input. It can be measured in multiple ways, and we at Elastic Observability chose CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" emissions and annualized CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" emissions (more details on them later).\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Let\\\\u2019s take the example of an e-commerce website: the energy efficiency of the \\\\u201Csearch inventory\\\\u201D process could be calculated as the average CPU time needed to serve a user request. Once the baseline for this value is determined, changes to the software delivering the search process may result in more or less CPU time consumed for the same feature, resulting in less or more efficient code.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"how-to-set-up-and-configure-wattage-and-co2\\",children:\\"How to set up and configure wattage and CO2\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"You can find a \\\\u201CSettings\\\\u201D button in the top-right corner of the Universal Profiling views. From there, you can customize the coefficient used to calculate CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" emissions tied to profiling data.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"The values set here will be used only when the profiles gathered from host agents are not already associated with publicly known data certified by cloud providers. For example, suppose you have a hybrid cloud deployment with a portion of your workload running on-premise and a portion running in GCP. In that case, the values set here will only be used to calculate the CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" emissions for the on-premise machines; we already use all the coefficients as declared by GCP to calculate the emissions of those machines.\\"]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"python-vs-go\\",children:\\"Python vs. Go\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Our first \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/importing-chess-games-elasticsearch-universal-profiling\\",rel:\\"nofollow\\",children:\\"blog post\\"}),\\" implemented a solution to read PGN chess games, a text representation in Python. It showed how Universal Profiler can be leveraged to identify slow functions and help you rewrite your code faster and more efficiently. At the end of it, we were happy with the Python version. It is still used today to grab the monthly updates from the \\",(0,i.jsx)(e.a,{href:\\"https://database.lichess.org/\\",rel:\\"nofollow\\",children:\\"Lichess database\\"}),\\" and ingest them into Elasticsearch\\\\xAE. I always wanted a reason to work more with Go, and we rewrote Python to Go. We leveraged goroutines and channels to send data through message passing. You can see more about it in our \\",(0,i.jsx)(e.a,{href:\\"https://github.com/philippkahr/blogs/tree/main/universal-profiling\\",rel:\\"nofollow\\",children:\\"GitHub repository\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Rewriting in Go also means switching from an interpreted language to a compiled one. As with everything in IT, this has benefits as well as disadvantages. One disadvantage is that we must ship debug symbols for the compiled binary. When we build the binary, we can use the symbtool program to ship the debug symbols. Without debug symbols, we see uninterpretable information as frames will be labeled with hexadecimal addresses in the flame graph rather than source code annotations.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"First, make sure that your executable includes debug symbols. Go per default builds with debug symbols. You can check this by using file yourbinary. The important part is that it is not stripped.\\"}),`\\n`,(0,i.jsx)(e.pre,{children:(0,i.jsx)(e.code,{className:\\"language-bash\\",children:`file lichess\\nlichess: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, Go BuildID=gufIkqA61WnCh8haeW-2/lfn3ne3U_y8MGoFD4AvT/QJEykzbacbYEmEQpXH6U/MqVbk-402n1k3B8yPB6I, with debug_info, not stripped\\n`})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Now we need to push the symbols using symbtool. You must create an Elasticsearch API key as the authentication method. In the Universal Profiler UI in Kibana\\\\xAE, an \\",(0,i.jsx)(e.strong,{children:\\"Add Data\\"}),\\" button in the top right corner will tell you exactly what to do. The command is like this. The -e is the part where you pass through the path of your executable file. In our case, this is lichess as above.\\"]}),`\\n`,(0,i.jsx)(e.pre,{children:(0,i.jsx)(e.code,{className:\\"language-bash\\",children:`symbtool push-symbols executable -t \\"ApiKey\\" -u \\"elasticsearch-url\\" -e \\"lichess\\"\\n`})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Now that debug symbols are available inside the cluster, we can run both implementations with the same file simultaneously and see what Universal Profiler can tell us about it.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"identifying-co2-and-energy-efficiency-savings\\",children:\\"Identifying CO2 and energy efficiency savings\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Python is more frequently scheduled on the CPU. Thus, it runs more often on the hardware and contributes more to the machines\\\\u2019 resource usage.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"We use the differential flame graph to identify and automatically calculate the difference in the following comparison. You need to filter on process.thread.name: \\\\u201Cpython3.11\\\\u201D in the baseline, and for the comparison, filter for lichess.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/universal-profiling-detecting-co2-energy-efficiency/1-elastic-blog-uni-profiling.png\\",alt:\\"1 - universal profiling\\",width:\\"1999\\",height:\\"1402\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Looking at the impact of annualized CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" emissions, we see a decrease from 65.32kg of CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" from the Python solution to 16.78kg. That is a difference of 48.54kg CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\" savings over a year.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"If we take a step back, we\\\\u2019ll want to figure out why Python produces many more emissions. In the flamegraph view, we filter down to just showing Python, and we can click on the first frame called python3.11. A little popup tells us that it caused 32.95kg of emissions. That is nearly 50% of all emissions caused by the runtime. Our program itself caused the other ~32kg of CO\\",(0,i.jsx)(\\"sub\\",{children:\\"2\\"}),\\". We immediately reduced 32kg of annual emissions by cutting out the Python interpreter with Go.\\"]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"We can lock that box using a right click and click \\",(0,i.jsx)(e.strong,{children:\\"Show more information\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/universal-profiling-detecting-co2-energy-efficiency/2-elastic-blog-uni-profiling.png\\",alt:\\"2 - universal profiling graphs blue-orange\\",width:\\"1999\\",height:\\"1574\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"The \\",(0,i.jsx)(e.strong,{children:\\"Show more information\\"}),\\" link displays detailed information about the frame, like sample count, total CPU, core seconds, and dollar costs. We won\\\\u2019t go into more detail in this blog.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/universal-profiling-detecting-co2-energy-efficiency/3-elastic-blog-uni-profiling.png\\",alt:\\"3 impact estimates\\",width:\\"1706\\",height:\\"1208\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"reduce-your-carbon-footprint-today-with-universal-profiling\\",children:\\"Reduce your carbon footprint today with Universal Profiling\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"This blog post demonstrates that rewriting your code base can reduce your carbon footprint immensely. Using Universal Profiler, you could do a quick PoC to showcase how much carbon resources can be spared.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Learn how you can \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/profiling-get-started.html\\",rel:\\"nofollow\\",children:\\"get started\\"}),\\" with Elastic Universal Profiling today.\\"]}),`\\n`,(0,i.jsxs)(e.blockquote,{children:[`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Cluster for storing the data where three nodes, each 64GB RAM and 32 CPU cores, are running GCP on Elastic Cloud.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"The machine for sending the data is a GCP e2-standard-32, thus 128GB RAM and 32 CPU cores with a 500GB balanced disk to read the games from.\\"}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"The file used for the games is this \\",(0,i.jsx)(e.a,{href:\\"https://database.lichess.org/standard/lichess_db_standard_rated_2023-12.pgn.zst\\",rel:\\"nofollow\\",children:\\"Lichess database\\"}),\\" containing 96,909,211 games. The extracted file size is 211GB.\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.strong,{children:\\"Source:\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"*\\",(0,i.jsx)(e.a,{href:\\"https://media.ccc.de/v/camp2023-57070-energy_consumption_of_data_centers\\",rel:\\"nofollow\\",children:\\"https://media.ccc.de/v/camp2023-57070-energy_consumption_of_data_centers\\"})]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,i.jsx)(e,{...n,children:(0,i.jsx)(h,{...n})}):h(n)}return v(P);})();\\n;return Component;"},"_id":"articles/universal-profiling-detecting-co2-energy-efficiency.mdx","_raw":{"sourceFilePath":"articles/universal-profiling-detecting-co2-energy-efficiency.mdx","sourceFileName":"universal-profiling-detecting-co2-energy-efficiency.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/universal-profiling-detecting-co2-energy-efficiency"},"type":"Article","imageUrl":"/assets/images/universal-profiling-detecting-co2-energy-efficiency/141935_-_Blog_header_image-_Op1_V1.jpg","readingTime":"7 min read","url":"/universal-profiling-detecting-co2-energy-efficiency","headings":[{"level":2,"title":"Why efficiency matters — for you and the environment","href":"#why-efficiency-matters--for-you-and-the-environment"},{"level":2,"title":"How to set up and configure wattage and CO2","href":"#how-to-set-up-and-configure-wattage-and-co2"},{"level":2,"title":"Python vs. Go","href":"#python-vs-go"},{"level":2,"title":"Identifying CO2 and energy efficiency savings","href":"#identifying-co2-and-energy-efficiency-savings"},{"level":2,"title":"Reduce your carbon footprint today with Universal Profiling","href":"#reduce-your-carbon-footprint-today-with-universal-profiling"}]},{"title":"Combining Elastic Universal Profiling with Java APM Services and Traces","slug":"universal-profiling-with-java-apm-services-traces","date":"2024-06-20","description":"Learn how to combine the power of Elastic universal profiling with APM data from Java services to easily pinpoint CPU bottlenecks. Compatible with both OpenTelemetry and the classic Elastic APM Agent.","image":"blog-header.jpg","author":[{"slug":"jonas-kunz","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"universal-profiling","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\nIn [a previous blog post](https://www.elastic.co/observability-labs/blog/continuous-profiling-distributed-tracing-correlation), we introduced the technical details of how we managed to correlate eBPF profiling data with APM traces.\\nThis time, we\'ll show you how to get this feature up and running to pinpoint CPU bottlenecks in your Java services! The correlation is supported for both OpenTelemetry and the classic Elastic APM Agent. We\'ll show you how to enable it for both.\\n\\n## Demo Application\\n\\nFor this blog post, we’ll be using the [cpu-burner demo application](https://github.com/JonasKunz/cpu-burner) to showcase the correlation capabilities of APM, tracing, and profiling in Elastic. This application was built to continuously execute several CPU-intensive tasks:\\n* It computes Fibonacci numbers using the naive, recursive algorithm.\\n* It hashes random data with the SHA-2 and SHA-3 hashing algorithms.\\n* It performs numerous large background allocations to stress the garbage collector.\\n\\nThe computations of the Fibonacci numbers and the hashing will each be visible as transactions in Elastic: They have been manually instrumented using the OpenTelemetry API.\\n\\n## Setting up Profiling and APM\\n\\nFirst, we’ll need to set up the universal profiling host agent on the host where the demo application will run. Starting from version 8.14.0, correlation with APM data is supported and enabled out of the box for the profiler. There is no special configuration needed; we can just follow the [standard setup guide](https://www.elastic.co/guide/en/observability/current/profiling-get-started.html).\\nNote that at the time of writing, universal profiling only supports Linux.\\nOn Windows, you\'ll have to use a VM to try the demo.\\nOn macOS, you can use [colima](https://github.com/abiosoft/colima) as docker engine and run the profiling host agent and the demo app in container images.\\n\\nIn addition, we’ll need to instrument our demo application with an APM agent. We can either use the [classic Elastic APM agent](https://github.com/elastic/apm-agent-java) or the [Elastic OpenTelemetry Distribution](https://github.com/elastic/elastic-otel-java).\\n\\n### Using the Classic Elastic APM Agent\\n\\nStarting with version 1.50.0, the classic Elastic APM agent ships with the capability to correlate the traces it captures with the profiling data from universal profiling. We’ll just need to enable it explicitly via the **universal_profiling_integration_enabled** config option. Here is the standard command line for running the demo application with the setting enabled:\\n\\n```shell\\ncurl -o \'elastic-apm-agent.jar\' -L \'https://oss.sonatype.org/service/local/artifact/maven/redirect?r=releases&g=co.elastic.apm&a=elastic-apm-agent&v=LATEST\'\\njava -javaagent:elastic-apm-agent.jar \\\\\\n-Delastic.apm.service_name=cpu-burner-elastic \\\\\\n-Delastic.apm.secret_token=XXXXX \\\\\\n-Delastic.apm.server_url= \\\\\\n-Delastic.apm.application_packages=co.elastic.demo \\\\\\n-Delastic.apm.universal_profiling_integration_enabled=true \\\\\\n-jar ./target/cpu-burner.jar\\n```\\n\\n### Using OpenTelemetry\\n\\nThe feature is also available as an OpenTelemetry SDK extension.\\nThis means you can use it as a plugin for the vanilla OpenTelemetry agent or add it to your OpenTelemetry SDK if you are not using an agent.\\nIn addition, the feature ships by default with the Elastic OpenTelemetry Distribution for Java and can be used via any of the [possible usage methods](https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent).\\nWhile the extension is currently Elastic-specific, we are already working with the various OpenTelemetry SIGs on standardizing the correlation mechanism, especially now after the [eBPF profiling agent has been contributed](https://www.elastic.co/observability-labs/blog/elastic-profiling-agent-acceptance-opentelemetry).\\n\\nFor this demo, we’ll be using the Elastic OpenTelemetry Distro Java agent to run the extension:\\n\\n```shell\\ncurl -o \'elastic-otel-javaagent.jar\' -L \'https://oss.sonatype.org/service/local/artifact/maven/redirect?r=releases&g=co.elastic.otel&a=elastic-otel-javaagent&v=LATEST\'\\njava -javaagent:./elastic-otel-javaagent.jar \\\\\\n-Dotel.exporter.otlp.endpoint= \\\\\\n\\"-Dotel.exporter.otlp.headers=Authorization=Bearer XXXX\\" \\\\\\n-Dotel.service.name=cpu-burner-otel \\\\\\n-Delastic.otel.universal.profiling.integration.enabled=true \\\\\\n-jar ./target/cpu-burner.jar\\n```\\n\\nHere, we explicitly enabled the profiling integration feature via the **elastic.otel.universal.profiling.integration.enabled** property. Note that with an upcoming release of the universal profiling feature, this won’t be necessary anymore! The OpenTelemetry extension will then automatically detect the presence of the profiler and enable the correlation feature based on that.\\n\\nThe demo repository also comes with a Dockerfile, so you can alternatively build and run the app in docker:\\n\\n```shell\\ndocker build -t cpu-burner .\\ndocker run --rm -e OTEL_EXPORTER_OTLP_ENDPOINT= -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer XXXX\\" cpu-burner\\n```\\n\\nAnd that’s it for setup; we are now ready to inspect the correlated profiling data!\\n\\n## Analyzing Service CPU Usage\\n\\nThe first thing we can do now is head to the “Flamegraph” view in Universal Profiling and inspect flamegraphs filtered on APM services. Without the APM correlation, universal profiling is limited to filtering on infrastructure concepts, such as hosts, containers, and processes.\\nBelow is a screencast showing a flamegraph filtered on the service name of our demo application:\\n\\n![Universal Profiling Flamegraph filtered on the service name of our demo application](/assets/images/universal-profiling-with-java-apm-services-traces/service-profiling.gif)\\n\\nWith this filter applied, we get a flamegraph aggregated over all instances of our service. If that is not desired, we could narrow down the filter, e.g. based on the host or container names. Note that the same service-level flamegraph view is also available on the “Universal Profiling” tab in the APM service UI.\\n\\nThe flamegraphs show exactly how the demo application is spending its CPU time, independently of whether it is covered by instrumentation or not. From left to right, we can first see the time spent in application tasks: We can identify the background allocations not covered by APM transactions as well as the SHA-computation and Fibonacci transactions.\\nInterestingly, this application logic only covers roughly 60% of the total CPU time! The remaining time is spent mostly in the G1 garbage collector due to the high allocation rate of our application. The flamegraph shows all G1-related activities and the timing of the individual phases of concurrent tasks. We can easily identify those based on the native function names. This is made possible by universal profiling being capable of profiling and symbolizing the JVM’s C++ code in addition to the Java code.\\n\\n## Pinpointing Transaction Bottlenecks\\n\\nWhile the service-level flamegraph already gives good insights on where our transactions consume the most CPU, this is mainly due to the simplicity of the demo application. In real-world applications, it can be much harder to pinpoint that certain stack frames come mostly from certain transactions. For this reason, the APM agent also correlates CPU profiling data from universal profiling on the transaction level.\\n\\nWe can navigate to the “Universal Profiling” tab on the transaction details page to get per-transaction flamegraphs:\\n\\n![Navigation to per-transaction profiling flamegraphs](/assets/images/universal-profiling-with-java-apm-services-traces/navigate-to-transaction-profiles.gif)\\n\\nFor example, let’s have a look at the flamegraph of our transaction computing SHA-2 and SHA-3 hashes of randomly generated data:\\n\\n![Flamegraph for the hashing transaction](/assets/images/universal-profiling-with-java-apm-services-traces/tx-unfiltered.png)\\n\\nInterestingly, the flamegraph uncovers some unexpected results: The transactions spend more time computing the random bytes to be hashed rather than on the hashing itself! So if this were a real-world application, a possible optimization could be to use a more performant random number generator.\\n\\nIn addition, we can see that the MessageDigest.update call for computing the hash values fans out into two different code paths: One is a call into the [BouncyCastle cryptography library](https://www.bouncycastle.org/), the other one is a JVM stub routine, meaning that the JIT compiler has inserted special assembly code for a function.\\n\\nThe flamegraph shown in the screenshot displays the aggregated data for all “shaShenanigans” transactions in the given time filter. We can further filter this down using the transaction filter bar at the top. To make the best use of this, the demo application annotates the transactions with the hashing algorithm used via OpenTelemetry attributes:\\n\\n```java\\npublic static void shaShenanigans(MessageDigest digest) {\\n Span span = tracer.spanBuilder(\\"shaShenanigans\\")\\n .setAttribute(\\"algorithm\\", digest.getAlgorithm())\\n .startSpan();\\n ...\\n span.end()\\n}\\n```\\n\\nSo, let’s filter our flamegraph based on the used hashing algorithm:\\n\\n![Transaction Filter Bar](/assets/images/universal-profiling-with-java-apm-services-traces/tx-filter-bar.png)\\n\\nNote that “SHA-256” is the name of the JVM built-in SHA-2 256-bit implementation. This now gives the following flamegraph:\\n\\n![Transaction Filter Bar](/assets/images/universal-profiling-with-java-apm-services-traces/tx-sha-256.png)\\n\\nWe can see that the BouncyCastle stack frames are gone and MessageDigest.update spends all its time in the JVM stub routines. Therefore, the stub routine is likely hand-crafted assembly from the JVM maintainers for the SHA2 algorithm.\\n\\nIf we instead filter on “SHA3-256”, we get the following result:\\n\\n![Transaction Filter Bar](/assets/images/universal-profiling-with-java-apm-services-traces/tx-sha3.png)\\n\\nNow, as expected, MessageDigest.update spends all its time in the BouncyCastle library for the SHA3 implementation. Note that the hashing here takes up more time in relation to the random data generation, showing that the SHA2 JVM stub routine is significantly faster than the BouncyCastle Java SHA3 implementation.\\n\\nThis filtering is not limited to custom attributes like those shown in this demo. You can filter on any transaction attributes, including latency, HTTP headers, and so on. For example, for typical HTTP applications, it allows analyzing the efficiency of the used JSON serializer based on the payload size.\\nNote that while it is possible to filter on single transaction instances (e.g. based on trace.id), this is not recommended: To allow continuous profiling in production systems, the profiler by default runs with a low sampling rate of 20hz. This means that for typical real-world applications, this will not yield enough data when looking at a single transaction execution. Instead, we gain insights by monitoring multiple executions of a group of transactions over time and aggregating their samples, for example in a flamegraph.\\n\\n## Summary\\n\\nA common reason for applications to degrade is overly high CPU usage. In this blog post, we showed how to combine universal profiling with APM to find the actual root cause in such cases: We explained how to analyze the CPU time using profiling flamegraphs on service and transaction levels.\\nIn addition, we further drilled down into data using custom filters.\\nWe used a simple demo application for this purpose, so go ahead and try it yourself with your own, real-world applications to uncover the actual power of the feature!\\n","code":"var Component=(()=>{var d=Object.create;var o=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var m=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var w=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),v=(i,e)=>{for(var a in e)o(i,a,{get:e[a],enumerable:!0})},s=(i,e,a,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let n of u(e))!f.call(i,n)&&n!==a&&o(i,n,{get:()=>e[n],enumerable:!(r=g(e,n))||r.enumerable});return i};var b=(i,e,a)=>(a=i!=null?d(m(i)):{},s(e||!i||!i.__esModule?o(a,\\"default\\",{value:i,enumerable:!0}):a,i)),y=i=>s(o({},\\"__esModule\\",{value:!0}),i);var h=w((j,l)=>{l.exports=_jsx_runtime});var P={};v(P,{default:()=>p,frontmatter:()=>T});var t=b(h()),T={title:\\"Combining Elastic Universal Profiling with Java APM Services and Traces\\",slug:\\"universal-profiling-with-java-apm-services-traces\\",date:\\"2024-06-20\\",description:\\"Learn how to combine the power of Elastic universal profiling with APM data from Java services to easily pinpoint CPU bottlenecks. Compatible with both OpenTelemetry and the classic Elastic APM Agent.\\",author:[{slug:\\"jonas-kunz\\"}],image:\\"blog-header.jpg\\",tags:[{slug:\\"opentelemetry\\"},{slug:\\"universal-profiling\\"},{slug:\\"apm\\"}]};function c(i){let e={a:\\"a\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"In \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/continuous-profiling-distributed-tracing-correlation\\",rel:\\"nofollow\\",children:\\"a previous blog post\\"}),`, we introduced the technical details of how we managed to correlate eBPF profiling data with APM traces.\\nThis time, we\'ll show you how to get this feature up and running to pinpoint CPU bottlenecks in your Java services! The correlation is supported for both OpenTelemetry and the classic Elastic APM Agent. We\'ll show you how to enable it for both.`]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"demo-application\\",children:\\"Demo Application\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"For this blog post, we\\\\u2019ll be using the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/JonasKunz/cpu-burner\\",rel:\\"nofollow\\",children:\\"cpu-burner demo application\\"}),\\" to showcase the correlation capabilities of APM, tracing, and profiling in Elastic. This application was built to continuously execute several CPU-intensive tasks:\\"]}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"It computes Fibonacci numbers using the naive, recursive algorithm.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"It hashes random data with the SHA-2 and SHA-3 hashing algorithms.\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"It performs numerous large background allocations to stress the garbage collector.\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The computations of the Fibonacci numbers and the hashing will each be visible as transactions in Elastic: They have been manually instrumented using the OpenTelemetry API.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"setting-up-profiling-and-apm\\",children:\\"Setting up Profiling and APM\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"First, we\\\\u2019ll need to set up the universal profiling host agent on the host where the demo application will run. Starting from version 8.14.0, correlation with APM data is supported and enabled out of the box for the profiler. There is no special configuration needed; we can just follow the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/profiling-get-started.html\\",rel:\\"nofollow\\",children:\\"standard setup guide\\"}),`.\\nNote that at the time of writing, universal profiling only supports Linux.\\nOn Windows, you\'ll have to use a VM to try the demo.\\nOn macOS, you can use `,(0,t.jsx)(e.a,{href:\\"https://github.com/abiosoft/colima\\",rel:\\"nofollow\\",children:\\"colima\\"}),\\" as docker engine and run the profiling host agent and the demo app in container images.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In addition, we\\\\u2019ll need to instrument our demo application with an APM agent. We can either use the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java\\",rel:\\"nofollow\\",children:\\"classic Elastic APM agent\\"}),\\" or the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java\\",rel:\\"nofollow\\",children:\\"Elastic OpenTelemetry Distribution\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.h3,{id:\\"using-the-classic-elastic-apm-agent\\",children:\\"Using the Classic Elastic APM Agent\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Starting with version 1.50.0, the classic Elastic APM agent ships with the capability to correlate the traces it captures with the profiling data from universal profiling. We\\\\u2019ll just need to enable it explicitly via the \\",(0,t.jsx)(e.strong,{children:\\"universal_profiling_integration_enabled\\"}),\\" config option. Here is the standard command line for running the demo application with the setting enabled:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-shell\\",children:`curl -o \'elastic-apm-agent.jar\' -L \'https://oss.sonatype.org/service/local/artifact/maven/redirect?r=releases&g=co.elastic.apm&a=elastic-apm-agent&v=LATEST\'\\njava -javaagent:elastic-apm-agent.jar \\\\\\\\\\n-Delastic.apm.service_name=cpu-burner-elastic \\\\\\\\\\n-Delastic.apm.secret_token=XXXXX \\\\\\\\\\n-Delastic.apm.server_url= \\\\\\\\\\n-Delastic.apm.application_packages=co.elastic.demo \\\\\\\\\\n-Delastic.apm.universal_profiling_integration_enabled=true \\\\\\\\\\n-jar ./target/cpu-burner.jar\\n`})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"using-opentelemetry\\",children:\\"Using OpenTelemetry\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[`The feature is also available as an OpenTelemetry SDK extension.\\nThis means you can use it as a plugin for the vanilla OpenTelemetry agent or add it to your OpenTelemetry SDK if you are not using an agent.\\nIn addition, the feature ships by default with the Elastic OpenTelemetry Distribution for Java and can be used via any of the `,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-distribution-opentelemetry-java-agent\\",rel:\\"nofollow\\",children:\\"possible usage methods\\"}),`.\\nWhile the extension is currently Elastic-specific, we are already working with the various OpenTelemetry SIGs on standardizing the correlation mechanism, especially now after the `,(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/elastic-profiling-agent-acceptance-opentelemetry\\",rel:\\"nofollow\\",children:\\"eBPF profiling agent has been contributed\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"For this demo, we\\\\u2019ll be using the Elastic OpenTelemetry Distro Java agent to run the extension:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-shell\\",children:`curl -o \'elastic-otel-javaagent.jar\' -L \'https://oss.sonatype.org/service/local/artifact/maven/redirect?r=releases&g=co.elastic.otel&a=elastic-otel-javaagent&v=LATEST\'\\njava -javaagent:./elastic-otel-javaagent.jar \\\\\\\\\\n-Dotel.exporter.otlp.endpoint= \\\\\\\\\\n\\"-Dotel.exporter.otlp.headers=Authorization=Bearer XXXX\\" \\\\\\\\\\n-Dotel.service.name=cpu-burner-otel \\\\\\\\\\n-Delastic.otel.universal.profiling.integration.enabled=true \\\\\\\\\\n-jar ./target/cpu-burner.jar\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Here, we explicitly enabled the profiling integration feature via the \\",(0,t.jsx)(e.strong,{children:\\"elastic.otel.universal.profiling.integration.enabled\\"}),\\" property. Note that with an upcoming release of the universal profiling feature, this won\\\\u2019t be necessary anymore! The OpenTelemetry extension will then automatically detect the presence of the profiler and enable the correlation feature based on that.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The demo repository also comes with a Dockerfile, so you can alternatively build and run the app in docker:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-shell\\",children:`docker build -t cpu-burner .\\ndocker run --rm -e OTEL_EXPORTER_OTLP_ENDPOINT= -e OTEL_EXPORTER_OTLP_HEADERS=\\"Authorization=Bearer XXXX\\" cpu-burner\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"And that\\\\u2019s it for setup; we are now ready to inspect the correlated profiling data!\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"analyzing-service-cpu-usage\\",children:\\"Analyzing Service CPU Usage\\"}),`\\n`,(0,t.jsx)(e.p,{children:`The first thing we can do now is head to the \\\\u201CFlamegraph\\\\u201D view in Universal Profiling and inspect flamegraphs filtered on APM services. Without the APM correlation, universal profiling is limited to filtering on infrastructure concepts, such as hosts, containers, and processes.\\nBelow is a screencast showing a flamegraph filtered on the service name of our demo application:`}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/universal-profiling-with-java-apm-services-traces/service-profiling.gif\\",alt:\\"Universal Profiling Flamegraph filtered on the service name of our demo application\\",width:\\"1905\\",height:\\"854\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"With this filter applied, we get a flamegraph aggregated over all instances of our service. If that is not desired, we could narrow down the filter, e.g. based on the host or container names. Note that the same service-level flamegraph view is also available on the \\\\u201CUniversal Profiling\\\\u201D tab in the APM service UI.\\"}),`\\n`,(0,t.jsx)(e.p,{children:`The flamegraphs show exactly how the demo application is spending its CPU time, independently of whether it is covered by instrumentation or not. From left to right, we can first see the time spent in application tasks: We can identify the background allocations not covered by APM transactions as well as the SHA-computation and Fibonacci transactions.\\nInterestingly, this application logic only covers roughly 60% of the total CPU time! The remaining time is spent mostly in the G1 garbage collector due to the high allocation rate of our application. The flamegraph shows all G1-related activities and the timing of the individual phases of concurrent tasks. We can easily identify those based on the native function names. This is made possible by universal profiling being capable of profiling and symbolizing the JVM\\\\u2019s C++ code in addition to the Java code.`}),`\\n`,(0,t.jsx)(e.h2,{id:\\"pinpointing-transaction-bottlenecks\\",children:\\"Pinpointing Transaction Bottlenecks\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"While the service-level flamegraph already gives good insights on where our transactions consume the most CPU, this is mainly due to the simplicity of the demo application. In real-world applications, it can be much harder to pinpoint that certain stack frames come mostly from certain transactions. For this reason, the APM agent also correlates CPU profiling data from universal profiling on the transaction level.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"We can navigate to the \\\\u201CUniversal Profiling\\\\u201D tab on the transaction details page to get per-transaction flamegraphs:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/universal-profiling-with-java-apm-services-traces/navigate-to-transaction-profiles.gif\\",alt:\\"Navigation to per-transaction profiling flamegraphs\\",width:\\"1713\\",height:\\"803\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"For example, let\\\\u2019s have a look at the flamegraph of our transaction computing SHA-2 and SHA-3 hashes of randomly generated data:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/universal-profiling-with-java-apm-services-traces/tx-unfiltered.png\\",alt:\\"Flamegraph for the hashing transaction\\",width:\\"1848\\",height:\\"663\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Interestingly, the flamegraph uncovers some unexpected results: The transactions spend more time computing the random bytes to be hashed rather than on the hashing itself! So if this were a real-world application, a possible optimization could be to use a more performant random number generator.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In addition, we can see that the MessageDigest.update call for computing the hash values fans out into two different code paths: One is a call into the \\",(0,t.jsx)(e.a,{href:\\"https://www.bouncycastle.org/\\",rel:\\"nofollow\\",children:\\"BouncyCastle cryptography library\\"}),\\", the other one is a JVM stub routine, meaning that the JIT compiler has inserted special assembly code for a function.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The flamegraph shown in the screenshot displays the aggregated data for all \\\\u201CshaShenanigans\\\\u201D transactions in the given time filter. We can further filter this down using the transaction filter bar at the top. To make the best use of this, the demo application annotates the transactions with the hashing algorithm used via OpenTelemetry attributes:\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{className:\\"language-java\\",children:`public static void shaShenanigans(MessageDigest digest) {\\n Span span = tracer.spanBuilder(\\"shaShenanigans\\")\\n .setAttribute(\\"algorithm\\", digest.getAlgorithm())\\n .startSpan();\\n ...\\n span.end()\\n}\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"So, let\\\\u2019s filter our flamegraph based on the used hashing algorithm:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/universal-profiling-with-java-apm-services-traces/tx-filter-bar.png\\",alt:\\"Transaction Filter Bar\\",width:\\"1844\\",height:\\"240\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Note that \\\\u201CSHA-256\\\\u201D is the name of the JVM built-in SHA-2 256-bit implementation. This now gives the following flamegraph:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/universal-profiling-with-java-apm-services-traces/tx-sha-256.png\\",alt:\\"Transaction Filter Bar\\",width:\\"1552\\",height:\\"278\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"We can see that the BouncyCastle stack frames are gone and MessageDigest.update spends all its time in the JVM stub routines. Therefore, the stub routine is likely hand-crafted assembly from the JVM maintainers for the SHA2 algorithm.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"If we instead filter on \\\\u201CSHA3-256\\\\u201D, we get the following result:\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/universal-profiling-with-java-apm-services-traces/tx-sha3.png\\",alt:\\"Transaction Filter Bar\\",width:\\"1551\\",height:\\"279\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"Now, as expected, MessageDigest.update spends all its time in the BouncyCastle library for the SHA3 implementation. Note that the hashing here takes up more time in relation to the random data generation, showing that the SHA2 JVM stub routine is significantly faster than the BouncyCastle Java SHA3 implementation.\\"}),`\\n`,(0,t.jsx)(e.p,{children:`This filtering is not limited to custom attributes like those shown in this demo. You can filter on any transaction attributes, including latency, HTTP headers, and so on. For example, for typical HTTP applications, it allows analyzing the efficiency of the used JSON serializer based on the payload size.\\nNote that while it is possible to filter on single transaction instances (e.g. based on trace.id), this is not recommended: To allow continuous profiling in production systems, the profiler by default runs with a low sampling rate of 20hz. This means that for typical real-world applications, this will not yield enough data when looking at a single transaction execution. Instead, we gain insights by monitoring multiple executions of a group of transactions over time and aggregating their samples, for example in a flamegraph.`}),`\\n`,(0,t.jsx)(e.h2,{id:\\"summary\\",children:\\"Summary\\"}),`\\n`,(0,t.jsx)(e.p,{children:`A common reason for applications to degrade is overly high CPU usage. In this blog post, we showed how to combine universal profiling with APM to find the actual root cause in such cases: We explained how to analyze the CPU time using profiling flamegraphs on service and transaction levels.\\nIn addition, we further drilled down into data using custom filters.\\nWe used a simple demo application for this purpose, so go ahead and try it yourself with your own, real-world applications to uncover the actual power of the feature!`})]})}function p(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(c,{...i})}):c(i)}return y(P);})();\\n;return Component;"},"_id":"articles/universal-profiling-with-java-apm-services-traces.mdx","_raw":{"sourceFilePath":"articles/universal-profiling-with-java-apm-services-traces.mdx","sourceFileName":"universal-profiling-with-java-apm-services-traces.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/universal-profiling-with-java-apm-services-traces"},"type":"Article","imageUrl":"/assets/images/universal-profiling-with-java-apm-services-traces/blog-header.jpg","readingTime":"8 min read","url":"/universal-profiling-with-java-apm-services-traces","headings":[{"level":2,"title":"Demo Application","href":"#demo-application"},{"level":2,"title":"Setting up Profiling and APM","href":"#setting-up-profiling-and-apm"},{"level":3,"title":"Using the Classic Elastic APM Agent","href":"#using-the-classic-elastic-apm-agent"},{"level":3,"title":"Using OpenTelemetry","href":"#using-opentelemetry"},{"level":2,"title":"Analyzing Service CPU Usage","href":"#analyzing-service-cpu-usage"},{"level":2,"title":"Pinpointing Transaction Bottlenecks","href":"#pinpointing-transaction-bottlenecks"},{"level":2,"title":"Summary","href":"#summary"}]},{"title":"Using a custom agent with the OpenTelemetry Operator for Kubernetes","slug":"using-the-otel-operator-for-injecting-elastic-agents","date":"2024-07-16","description":"","image":"blog-header-720x420.jpg","author":[{"slug":"jack-shirazi","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"containers","type":"Tag","_raw":{}},{"slug":"java","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}}],"body":{"raw":"\\nThis is the second part of a two part series. The first part is available at [Zero config OpenTelemetry auto-instrumentation for Kubernetes Java applications](https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents). In that first part I walk through setting up and installing the [OpenTelemetry Operator for Kubernetes](https://github.com/open-telemetry/opentelemetry-operator/), and configuring that for auto-instrumentation of a Java application using the [OpenTelemetry Java agent](https://github.com/open-telemetry/opentelemetry-java-instrumentation/). \\n\\nIn this second part, I show how to install _any_ Java agent via the OpenTelemetry operator, using the Elastic Java agents as examples.\\n\\n## Installation and configuration recap\\n\\nPart 1 of this series, [Zero config OpenTelemetry auto-instrumentation for Kubernetes Java applications](https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents), details the installation and configuration of the OpenTelemetry operator and an Instrumentation resource. Here is an outline of the steps as a reminder:\\n\\n 1. Install cert-manager, eg `kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.4/cert-manager.yaml`\\n 2. Install the operator, eg `kubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/latest/download/opentelemetry-operator.yaml`\\n 3. Create an Instrumentation resource\\n 4. Add an annotation to either the deployment or the namespace\\n 5. Deploy the application as normal\\n\\nIn that first part, steps 3, 4 & 5 were implemented for the [OpenTelemetry Java agent](https://github.com/open-telemetry/opentelemetry-java-instrumentation/). In this blog I’ll implement them for other agents, using the Elastic APM agents as examples. I assume that steps 1 & 2 outlined above have already been done, ie that the operator is now installed. I will continue using the `banana` namespace for the examples, so ensure that namespace exists (`kubectl create namespace banana`). As per part 1, if you use any of the example instrumentation definitions below, you’ll need to substitute `my.apm.server.url` and `my-apm-secret-token` with the values appropriate for your collector.\\n\\n## Using the Elastic Distribution for OpenTelemetry Java\\n\\nFrom version 0.4.0, the [Elastic Distribution for OpenTelemetry Java](https://github.com/elastic/elastic-otel-java) includes the agent jar at the path `/javaagent.jar` in the docker image - which is essentially all that is needed for a docker image to be usable by the OpenTelemetry operator for auto-instrumentation. This means the Instrumentation resource is straightforward to define, and as it’s a distribution of the OpenTelemetry Java agent, all the OpenTelemetry environment can apply:\\n```\\napiVersion: opentelemetry.io/v1alpha1\\nkind: Instrumentation\\nmetadata:\\n name: elastic-otel\\n namespace: banana\\nspec:\\n exporter:\\n endpoint: https://my.apm.server.url\\n propagators:\\n - tracecontext\\n - baggage\\n - b3\\n sampler:\\n type: parentbased_traceidratio\\n argument: \\"1.0\\"\\n java:\\n image: docker.elastic.co/observability/elastic-otel-javaagent:1.1.0\\n env:\\n - name: OTEL_EXPORTER_OTLP_HEADERS\\n value: \\"Authorization=Bearer my-apm-secret-token\\"\\n - name: ELASTIC_OTEL_INFERRED_SPANS_ENABLED\\n value: \\"true\\"\\n - name: ELASTIC_OTEL_SPAN_STACK_TRACE_MIN_DURATION\\n value: \\"50\\"\\n```\\nI’ve included environment for switching on several features in the agent, including\\n\\n 1. ELASTIC\\\\_APM\\\\_PROFILING\\\\_INFERRED\\\\_SPANS\\\\_ENABLED to switch on the inferred spans implementation feature described in [this blog](https://www.elastic.co/observability-labs/blog/tracing-data-inferred-spans-opentelemetry)\\n 2. Span stack traces are automatically captured if the span takes more than ELASTIC\\\\_OTEL\\\\_SPAN\\\\_STACK\\\\_TRACE\\\\_MIN\\\\_DURATION (default would be 5ms)\\n\\nAdding in the annotation ...\\n```\\nmetadata:\\n annotations:\\n instrumentation.opentelemetry.io/inject-java: \\"elastic-otel\\"\\n```\\n... to the pod yaml gets the application traced, and displayed in the Elastic APM UI, including the inferred child spans and stack traces\\n\\n![Elastic APM UI showing methodB traced with stack traces and inferred spans](/assets/images/using-the-otel-operator-for-injecting-elastic-agents/elastic-apm-ui-with-stack-trace.png)\\n\\nThe additions from the features mentioned above are circled in red - inferred spans (for methodC and methodD) bottom left, and the stack trace top right. (Note that the pod included the `OTEL_INSTRUMENTATION_METHODS_INCLUDE` environment variable set to `\\"test.Testing[methodB]\\"` so that traces from methodB are shown; for pod configuration see the \\"Trying it\\" section in [part 1](https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents))\\n\\n## Using the Elastic APM Java agent\\n\\nFrom version 1.50.0, the [Elastic APM Java agent](https://github.com/elastic/apm-agent-java) includes the agent jar at the path /javaagent.jar in the docker image - which is essentially all that is needed for a docker image to be usable by the OpenTelemetry operator for auto-instrumentation. This means the Instrumentation resource is straightforward to define:\\n\\n```\\napiVersion: opentelemetry.io/v1alpha1\\nkind: Instrumentation\\nmetadata:\\n name: elastic-apm\\n namespace: banana\\nspec:\\n java:\\n image: docker.elastic.co/observability/apm-agent-java:1.52.1\\n env:\\n - name: ELASTIC_APM_SERVER_URL\\n value: \\"https://my.apm.server.url\\"\\n - name: ELASTIC_APM_SECRET_TOKEN\\n value: \\"my-apm-secret-token\\"\\n - name: ELASTIC_APM_LOG_LEVEL\\n value: \\"INFO\\"\\n - name: ELASTIC_APM_PROFILING_INFERRED_SPANS_ENABLED\\n value: \\"true\\"\\n - name: ELASTIC_APM_LOG_SENDING\\n value: \\"true\\"\\n```\\n\\nI’ve included environment for switching on several features in the agent, including\\n\\n - ELASTIC\\\\_APM\\\\_LOG\\\\_LEVEL set to the default value (INFO) which could easily be switched to DEBUG\\n - ELASTIC\\\\_APM\\\\_PROFILING\\\\_INFERRED\\\\_SPANS\\\\_ENABLED to switch on the inferred spans implementation equivalent to the feature described in [this blog](https://www.elastic.co/observability-labs/blog/tracing-data-inferred-spans-opentelemetry)\\n - ELASTIC\\\\_APM\\\\_LOG\\\\_SENDING which switches on sending logs to the APM UI, the logs are automatically correlated with transactions (for all common logging frameworks)\\n\\nAdding in the annotation ...\\n\\n```\\nmetadata:\\n annotations:\\n instrumentation.opentelemetry.io/inject-java: \\"elastic-apm\\"\\n```\\n\\n... to the pod yaml gets the application traced, and displayed in the Elastic APM UI, including the inferred child spans\\n\\n![Elastic APM UI showing methodB traced with inferred spans](/assets/images/using-the-otel-operator-for-injecting-elastic-agents/elastic-apm-ui-with-inferred-spans.png)\\n\\n(Note that the pod included the `ELASTIC_APM_TRACE_METHODS` environment variable set to `\\"test.Testing#methodB\\"` so that traces from methodB are shown; for pod configuration see the \\"Trying it\\" section in [part 1](https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents))\\n\\n## Using an extension with the OpenTelemetry Java agent\\n\\nSetting up an Instrumentation resource for the OpenTelemetry Java agent is straightforward and was done in [part 1](https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents) of this two part series - and you can see from the above examples it’s just a matter of deciding on the docker image URL you want to use. However if you want to include an _extension_ in your deployment, this is a little more complex, but also supported by the operator. Basically the extensions you want to include with the agent need to be in docker images - or you have to build an image which includes the extensions that are not already in images. Then you declare the images and the directories the extensions are in, in the Instrumentation resource. As an example, I’ll show an Instrumentation which uses version 2.5.0 of the [OpenTelemetry Java agent](https://github.com/open-telemetry/opentelemetry-java-instrumentation/) together with the [inferred spans extension](https://github.com/elastic/elastic-otel-java/tree/main/inferred-spans) from the [Elastic OpenTelemetry Java distribution](https://github.com/elastic/elastic-otel-java). The distro image includes the extension at path `/extensions/elastic-otel-agentextension.jar`. The Instrumentation resource allows either directories or file paths to be specified, here I’ll list the directory:\\n```\\napiVersion: opentelemetry.io/v1alpha1\\nkind: Instrumentation\\nmetadata:\\n name: otel-plus-extension-instrumentation\\n namespace: banana\\nspec:\\n exporter:\\n endpoint: https://my.apm.server.url\\n propagators:\\n - tracecontext\\n - baggage\\n - b3\\n sampler:\\n type: parentbased_traceidratio\\n argument: \\"1.0\\"\\n java:\\n image: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java:2.5.0\\n extensions:\\n - image: \\"docker.elastic.co/observability/elastic-otel-javaagent:1.1.0\\"\\n dir: \\"/extensions\\"\\n env:\\n - name: OTEL_EXPORTER_OTLP_HEADERS\\n value: \\"Authorization=Bearer my-apm-secret-token\\"\\n - name: ELASTIC_OTEL_INFERRED_SPANS_ENABLED\\n value: \\"true\\"\\n```\\n\\nNote that you can have multiple `image … dir` pairs, ie include multiple extensions from different images. Note also if you are testing this specific configuration that the inferred spans extension included here will be contributed to the OpenTelemetry contrib repo at some point after this blog is published, after which the extension may no longer be present in a later version of the referred image (since it will be available from the [contrib repo](https://github.com/open-telemetry/opentelemetry-java-contrib/) instead).\\n\\n## Next steps\\n\\nHere I’ve shown how to use any agent with the [OpenTelemetry Operator for Kubernetes](https://github.com/open-telemetry/opentelemetry-operator/), and configure that for your system. In particular the examples have showcased how to use the Elastic Java agents to auto-instrument Java applications running in your Kubernetes clusters, along with how to enable features, using Instrumentation resources. And you can set it up for either zero config for deployments, or for just one annotation which is generally a more flexible mechanism (you can have multiple Instrumentation resource definitions, and the deployment can select the appropriate one for its application).\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var y=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var a in e)r(n,a,{get:e[a],enumerable:!0})},s=(n,e,a,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let i of u(e))!f.call(n,i)&&i!==a&&r(n,i,{get:()=>e[i],enumerable:!(o=m(e,i))||o.enumerable});return n};var v=(n,e,a)=>(a=n!=null?p(g(n)):{},s(e||!n||!n.__esModule?r(a,\\"default\\",{value:n,enumerable:!0}):a,n)),w=n=>s(r({},\\"__esModule\\",{value:!0}),n);var h=y((T,l)=>{l.exports=_jsx_runtime});var _={};b(_,{default:()=>d,frontmatter:()=>E});var t=v(h()),E={title:\\"Using a custom agent with the OpenTelemetry Operator for Kubernetes\\",slug:\\"using-the-otel-operator-for-injecting-elastic-agents\\",description:\\"\\",author:[{slug:\\"jack-shirazi\\"}],tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"containers\\"},{slug:\\"java\\"},{slug:\\"instrumentation\\"},{slug:\\"kubernetes\\"}],date:\\"2024-07-16\\",image:\\"blog-header-720x420.jpg\\"};function c(n){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",ul:\\"ul\\",...n.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsxs)(e.p,{children:[\\"This is the second part of a two part series. The first part is available at \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents\\",rel:\\"nofollow\\",children:\\"Zero config OpenTelemetry auto-instrumentation for Kubernetes Java applications\\"}),\\". In that first part I walk through setting up and installing the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-operator/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Operator for Kubernetes\\"}),\\", and configuring that for auto-instrumentation of a Java application using the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java agent\\"}),\\".\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In this second part, I show how to install \\",(0,t.jsx)(e.em,{children:\\"any\\"}),\\" Java agent via the OpenTelemetry operator, using the Elastic Java agents as examples.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"installation-and-configuration-recap\\",children:\\"Installation and configuration recap\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Part 1 of this series, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents\\",rel:\\"nofollow\\",children:\\"Zero config OpenTelemetry auto-instrumentation for Kubernetes Java applications\\"}),\\", details the installation and configuration of the OpenTelemetry operator and an Instrumentation resource. Here is an outline of the steps as a reminder:\\"]}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"Install cert-manager, eg \\",(0,t.jsx)(e.code,{children:\\"kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.4/cert-manager.yaml\\"})]}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"Install the operator, eg \\",(0,t.jsx)(e.code,{children:\\"kubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/latest/download/opentelemetry-operator.yaml\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Create an Instrumentation resource\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Add an annotation to either the deployment or the namespace\\"}),`\\n`,(0,t.jsx)(e.li,{children:\\"Deploy the application as normal\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In that first part, steps 3, 4 & 5 were implemented for the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java agent\\"}),\\". In this blog I\\\\u2019ll implement them for other agents, using the Elastic APM agents as examples. I assume that steps 1 & 2 outlined above have already been done, ie that the operator is now installed. I will continue using the \\",(0,t.jsx)(e.code,{children:\\"banana\\"}),\\" namespace for the examples, so ensure that namespace exists (\\",(0,t.jsx)(e.code,{children:\\"kubectl create namespace banana\\"}),\\"). As per part 1, if you use any of the example instrumentation definitions below, you\\\\u2019ll need to substitute \\",(0,t.jsx)(e.code,{children:\\"my.apm.server.url\\"}),\\" and \\",(0,t.jsx)(e.code,{children:\\"my-apm-secret-token\\"}),\\" with the values appropriate for your collector.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"using-the-elastic-distribution-for-opentelemetry-java\\",children:\\"Using the Elastic Distribution for OpenTelemetry Java\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"From version 0.4.0, the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java\\",rel:\\"nofollow\\",children:\\"Elastic Distribution for OpenTelemetry Java\\"}),\\" includes the agent jar at the path \\",(0,t.jsx)(e.code,{children:\\"/javaagent.jar\\"}),\\" in the docker image - which is essentially all that is needed for a docker image to be usable by the OpenTelemetry operator for auto-instrumentation. This means the Instrumentation resource is straightforward to define, and as it\\\\u2019s a distribution of the OpenTelemetry Java agent, all the OpenTelemetry environment can apply:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`apiVersion: opentelemetry.io/v1alpha1\\nkind: Instrumentation\\nmetadata:\\n name: elastic-otel\\n namespace: banana\\nspec:\\n exporter:\\n endpoint: https://my.apm.server.url\\n propagators:\\n - tracecontext\\n - baggage\\n - b3\\n sampler:\\n type: parentbased_traceidratio\\n argument: \\"1.0\\"\\n java:\\n image: docker.elastic.co/observability/elastic-otel-javaagent:1.1.0\\n env:\\n - name: OTEL_EXPORTER_OTLP_HEADERS\\n value: \\"Authorization=Bearer my-apm-secret-token\\"\\n - name: ELASTIC_OTEL_INFERRED_SPANS_ENABLED\\n value: \\"true\\"\\n - name: ELASTIC_OTEL_SPAN_STACK_TRACE_MIN_DURATION\\n value: \\"50\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"I\\\\u2019ve included environment for switching on several features in the agent, including\\"}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[\\"ELASTIC_APM_PROFILING_INFERRED_SPANS_ENABLED to switch on the inferred spans implementation feature described in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/tracing-data-inferred-spans-opentelemetry\\",rel:\\"nofollow\\",children:\\"this blog\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:\\"Span stack traces are automatically captured if the span takes more than ELASTIC_OTEL_SPAN_STACK_TRACE_MIN_DURATION (default would be 5ms)\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Adding in the annotation ...\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`metadata:\\n annotations:\\n instrumentation.opentelemetry.io/inject-java: \\"elastic-otel\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"... to the pod yaml gets the application traced, and displayed in the Elastic APM UI, including the inferred child spans and stack traces\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/using-the-otel-operator-for-injecting-elastic-agents/elastic-apm-ui-with-stack-trace.png\\",alt:\\"Elastic APM UI showing methodB traced with stack traces and inferred spans\\",width:\\"1999\\",height:\\"1018\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The additions from the features mentioned above are circled in red - inferred spans (for methodC and methodD) bottom left, and the stack trace top right. (Note that the pod included the \\",(0,t.jsx)(e.code,{children:\\"OTEL_INSTRUMENTATION_METHODS_INCLUDE\\"}),\\" environment variable set to \\",(0,t.jsx)(e.code,{children:\'\\"test.Testing[methodB]\\"\'}),\' so that traces from methodB are shown; for pod configuration see the \\"Trying it\\" section in \',(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents\\",rel:\\"nofollow\\",children:\\"part 1\\"}),\\")\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"using-the-elastic-apm-java-agent\\",children:\\"Using the Elastic APM Java agent\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"From version 1.50.0, the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/apm-agent-java\\",rel:\\"nofollow\\",children:\\"Elastic APM Java agent\\"}),\\" includes the agent jar at the path /javaagent.jar in the docker image - which is essentially all that is needed for a docker image to be usable by the OpenTelemetry operator for auto-instrumentation. This means the Instrumentation resource is straightforward to define:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`apiVersion: opentelemetry.io/v1alpha1\\nkind: Instrumentation\\nmetadata:\\n name: elastic-apm\\n namespace: banana\\nspec:\\n java:\\n image: docker.elastic.co/observability/apm-agent-java:1.52.1\\n env:\\n - name: ELASTIC_APM_SERVER_URL\\n value: \\"https://my.apm.server.url\\"\\n - name: ELASTIC_APM_SECRET_TOKEN\\n value: \\"my-apm-secret-token\\"\\n - name: ELASTIC_APM_LOG_LEVEL\\n value: \\"INFO\\"\\n - name: ELASTIC_APM_PROFILING_INFERRED_SPANS_ENABLED\\n value: \\"true\\"\\n - name: ELASTIC_APM_LOG_SENDING\\n value: \\"true\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"I\\\\u2019ve included environment for switching on several features in the agent, including\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsx)(e.li,{children:\\"ELASTIC_APM_LOG_LEVEL set to the default value (INFO) which could easily be switched to DEBUG\\"}),`\\n`,(0,t.jsxs)(e.li,{children:[\\"ELASTIC_APM_PROFILING_INFERRED_SPANS_ENABLED to switch on the inferred spans implementation equivalent to the feature described in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/tracing-data-inferred-spans-opentelemetry\\",rel:\\"nofollow\\",children:\\"this blog\\"})]}),`\\n`,(0,t.jsx)(e.li,{children:\\"ELASTIC_APM_LOG_SENDING which switches on sending logs to the APM UI, the logs are automatically correlated with transactions (for all common logging frameworks)\\"}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Adding in the annotation ...\\"}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`metadata:\\n annotations:\\n instrumentation.opentelemetry.io/inject-java: \\"elastic-apm\\"\\n`})}),`\\n`,(0,t.jsx)(e.p,{children:\\"... to the pod yaml gets the application traced, and displayed in the Elastic APM UI, including the inferred child spans\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/using-the-otel-operator-for-injecting-elastic-agents/elastic-apm-ui-with-inferred-spans.png\\",alt:\\"Elastic APM UI showing methodB traced with inferred spans\\",width:\\"1999\\",height:\\"1013\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"(Note that the pod included the \\",(0,t.jsx)(e.code,{children:\\"ELASTIC_APM_TRACE_METHODS\\"}),\\" environment variable set to \\",(0,t.jsx)(e.code,{children:\'\\"test.Testing#methodB\\"\'}),\' so that traces from methodB are shown; for pod configuration see the \\"Trying it\\" section in \',(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents\\",rel:\\"nofollow\\",children:\\"part 1\\"}),\\")\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"using-an-extension-with-the-opentelemetry-java-agent\\",children:\\"Using an extension with the OpenTelemetry Java agent\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Setting up an Instrumentation resource for the OpenTelemetry Java agent is straightforward and was done in \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-java-agents\\",rel:\\"nofollow\\",children:\\"part 1\\"}),\\" of this two part series - and you can see from the above examples it\\\\u2019s just a matter of deciding on the docker image URL you want to use. However if you want to include an \\",(0,t.jsx)(e.em,{children:\\"extension\\"}),\\" in your deployment, this is a little more complex, but also supported by the operator. Basically the extensions you want to include with the agent need to be in docker images - or you have to build an image which includes the extensions that are not already in images. Then you declare the images and the directories the extensions are in, in the Instrumentation resource. As an example, I\\\\u2019ll show an Instrumentation which uses version 2.5.0 of the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java agent\\"}),\\" together with the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java/tree/main/inferred-spans\\",rel:\\"nofollow\\",children:\\"inferred spans extension\\"}),\\" from the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-otel-java\\",rel:\\"nofollow\\",children:\\"Elastic OpenTelemetry Java distribution\\"}),\\". The distro image includes the extension at path \\",(0,t.jsx)(e.code,{children:\\"/extensions/elastic-otel-agentextension.jar\\"}),\\". The Instrumentation resource allows either directories or file paths to be specified, here I\\\\u2019ll list the directory:\\"]}),`\\n`,(0,t.jsx)(e.pre,{children:(0,t.jsx)(e.code,{children:`apiVersion: opentelemetry.io/v1alpha1\\nkind: Instrumentation\\nmetadata:\\n name: otel-plus-extension-instrumentation\\n namespace: banana\\nspec:\\n exporter:\\n endpoint: https://my.apm.server.url\\n propagators:\\n - tracecontext\\n - baggage\\n - b3\\n sampler:\\n type: parentbased_traceidratio\\n argument: \\"1.0\\"\\n java:\\n image: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java:2.5.0\\n extensions:\\n - image: \\"docker.elastic.co/observability/elastic-otel-javaagent:1.1.0\\"\\n dir: \\"/extensions\\"\\n env:\\n - name: OTEL_EXPORTER_OTLP_HEADERS\\n value: \\"Authorization=Bearer my-apm-secret-token\\"\\n - name: ELASTIC_OTEL_INFERRED_SPANS_ENABLED\\n value: \\"true\\"\\n`})}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Note that you can have multiple \\",(0,t.jsx)(e.code,{children:\\"image \\\\u2026 dir\\"}),\\" pairs, ie include multiple extensions from different images. Note also if you are testing this specific configuration that the inferred spans extension included here will be contributed to the OpenTelemetry contrib repo at some point after this blog is published, after which the extension may no longer be present in a later version of the referred image (since it will be available from the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-contrib/\\",rel:\\"nofollow\\",children:\\"contrib repo\\"}),\\" instead).\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"next-steps\\",children:\\"Next steps\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Here I\\\\u2019ve shown how to use any agent with the \\",(0,t.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-operator/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Operator for Kubernetes\\"}),\\", and configure that for your system. In particular the examples have showcased how to use the Elastic Java agents to auto-instrument Java applications running in your Kubernetes clusters, along with how to enable features, using Instrumentation resources. And you can set it up for either zero config for deployments, or for just one annotation which is generally a more flexible mechanism (you can have multiple Instrumentation resource definitions, and the deployment can select the appropriate one for its application).\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,t.jsx)(e,{...n,children:(0,t.jsx)(c,{...n})}):c(n)}return w(_);})();\\n;return Component;"},"_id":"articles/using-the-otel-operator-for-injecting-elastic-agents.mdx","_raw":{"sourceFilePath":"articles/using-the-otel-operator-for-injecting-elastic-agents.mdx","sourceFileName":"using-the-otel-operator-for-injecting-elastic-agents.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/using-the-otel-operator-for-injecting-elastic-agents"},"type":"Article","imageUrl":"/assets/images/using-the-otel-operator-for-injecting-elastic-agents/blog-header-720x420.jpg","readingTime":"8 min read","url":"/using-the-otel-operator-for-injecting-elastic-agents","headings":[{"level":2,"title":"Installation and configuration recap","href":"#installation-and-configuration-recap"},{"level":2,"title":"Using the Elastic Distribution for OpenTelemetry Java","href":"#using-the-elastic-distribution-for-opentelemetry-java"},{"level":2,"title":"Using the Elastic APM Java agent","href":"#using-the-elastic-apm-java-agent"},{"level":2,"title":"Using an extension with the OpenTelemetry Java agent","href":"#using-an-extension-with-the-opentelemetry-java-agent"},{"level":2,"title":"Next steps","href":"#next-steps"}]},{"title":"Zero config OpenTelemetry auto-instrumentation for Kubernetes Java applications","slug":"using-the-otel-operator-for-injecting-java-agents","date":"2024-07-11","description":"Walking through how to install and enable the OpenTelemetry Operator for Kubernetes to auto-instrument Java applications, with no configuration changes needed for deployments","image":"blog-header.png","author":[{"slug":"jack-shirazi","type":"Author","_raw":{}}],"tags":[{"slug":"opentelemetry","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}},{"slug":"containers","type":"Tag","_raw":{}},{"slug":"java","type":"Tag","_raw":{}},{"slug":"instrumentation","type":"Tag","_raw":{}},{"slug":"kubernetes","type":"Tag","_raw":{}}],"body":{"raw":"\\nThe [OpenTelemetry Java agent](https://github.com/open-telemetry/opentelemetry-java-instrumentation/) has a number of [ways to install](https://opentelemetry.io/docs/languages/java/automatic/#setup) the agent into a Java application. If you are running your Java applications in Kubernetes pods, there is a separate mechanism (which under the hood uses JAVA\\\\_TOOL\\\\_OPTIONS and other environment variables) to auto-instrument Java applications. This auto-instrumentation can be achieved with zero configuration of the applications and pods!\\n\\nThe mechanism to achieve zero-config auto-instrumentation of Java applications in Kubernetes is via the [OpenTelemetry Operator for Kubernetes](https://github.com/open-telemetry/opentelemetry-operator/). This operator has many capabilities and the full documentation (and of course source) is available in the project itself. In this blog, I\'ll walk through installing, setting up and running zero-config auto-instrumentation of Java applications in Kubernetes using the OpenTelemetry Operator.\\n\\n## Installing the OpenTelemetry Operator\\n\\nAt the time of writing this blog, the OpenTelemetry Operator needs the certification manager to be installed, after which the operator can be installed. Installing from the web is straightforward. First install the `cert-manager` (the version to be installed will be specified in the [OpenTelemetry Operator for Kubernetes](https://github.com/open-telemetry/opentelemetry-operator/) documentation):\\n\\n```\\nkubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.4/cert-manager.yaml\\n```\\n\\nThen when the cert managers are ready (`kubectl get pods -n cert-manager`)\xa0 ...\\n\\n```\\nNAMESPACE\xa0 \xa0 \xa0 NAME \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 READY\\ncert-manager \xa0 cert-manager-67c98b89c8-rnr5s\xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 \xa0 1/1\\ncert-manager \xa0 cert-manager-cainjector-5c5695d979-q9hxz \xa0 \xa0 1/1\\ncert-manager \xa0 cert-manager-webhook-7f9f8648b9-8gxgs\xa0 \xa0 \xa0 \xa0 1/1\\n```\\n\\n... you can install the OpenTelemetry Operator:\\n\\n```\\nkubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/latest/download/opentelemetry-operator.yaml\\n```\\n\\nYou can, of course, use a specific version of the operator instead of the `latest`. But here I’ve used the `latest` version.\\n\\n\\n## An Instrumentation resource\\n\\nNow you need to add just one further Kubernetes resource to enable auto-instrumentation: an `Instrumentation` resource. I am going to use the `banana` namespace for my examples, so I have first created that namespace (`kubectl create namespace banana`). The auto-instrumentation is specified and configured by these Instrumentation resources. Here is a basic one which will allow every Java pod in the `banana` namespace to be auto-instrumented with version 2.5.0 of the [OpenTelemetry Java agent](https://github.com/open-telemetry/opentelemetry-java-instrumentation/):\\n\\n```\\napiVersion: opentelemetry.io/v1alpha1\\nkind: Instrumentation\\nmetadata:\\n name: banana-instr\\n namespace: banana\\nspec:\\n exporter:\\n endpoint: \\"https://my.endpoint\\"\\n propagators:\\n - tracecontext\\n - baggage\\n - b3\\n sampler:\\n type: parentbased_traceidratio\\n argument: \\"1.0\\"\\n java:\\n image: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java:2.5.0\\n env:\\n - name: OTEL_EXPORTER_OTLP_HEADERS\\n value: \\"Authorization=Bearer MyAuth\\"\\n```\\n\\nCreating this resource (eg with `kubectl apply -f banana-instr.yaml`, assuming the above yaml was saved in file `banana-instr.yaml`) makes the `banana-instr` Instrumentation resource available for use. (Note you will need to change `my.endpoint` and `MyAuth` to values appropriate for your collector.) You can use this instrumentation immediately by adding an annotation to any deployment in the `banana` namespace:\\n\\n```\\nmetadata:\\n\xa0\xa0annotations:\\n\xa0\xa0\xa0\xa0instrumentation.opentelemetry.io/inject-java: \\"true\\"\\n```\\n\\nThe `banana-instr` Instrumentation resource is not yet set to be applied by _default_ to all pods in the banana namespace. Currently it\'s zero-config as far as the _application_ is concerned, but it requires an annotation added to a _pod or deployment_. To make it fully zero-config for _all pods_ in the `banana` namespace, we need to add that annotation to the namespace itself, ie editing the namespace (`kubectl edit namespace banana`) so it would then have contents similar to\\n\\n```\\napiVersion: v1\\nkind: Namespace\\nmetadata:\\n\xa0\xa0name: banana\\n\xa0\xa0annotations:\\n\xa0\xa0\xa0\xa0instrumentation.opentelemetry.io/inject-java: \\"banana-instr\\"\\n...\\n```\\n\\nNow we have a namespace that is going to auto-instrument _every_ Java application deployed in the `banana` namespace with the 2.5.0 [OpenTelemetry Java agent](https://github.com/open-telemetry/opentelemetry-java-instrumentation/)!\\n\\n\\n## Trying it\\n\\nThere is a simple example Java application at [docker.elastic.co/demos/apm/k8s-webhook-test](https://docker.elastic.co/demos/apm/k8s-webhook-test) which just repeatedly calls the chain `main->methodA->methodB->methodC->methodD` with some sleeps in the calls. Running this (`kubectl apply -f banana-app.yaml`) using a very basic pod definition:\\n\\n```\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: banana-app\\n namespace: banana\\n labels:\\n app: banana-app\\nspec:\\n containers:\\n - image: docker.elastic.co/demos/apm/k8s-webhook-test\\n imagePullPolicy: Always\\n name: banana-app\\n env: \\n - name: OTEL_INSTRUMENTATION_METHODS_INCLUDE\\n value: \\"test.Testing[methodB]\\"\\n```\\n\\nresults in the app being auto-instrumented with no configuration changes! The resulting app shows up in any APM UI, such as Elastic APM\\n\\n![Elastic APM UI showing methodB traced](/assets/images/using-the-otel-operator-for-injecting-java-agents/elastic-apm-ui-transaction.png)\\n\\nAs you can see, for this example I also added this env var to the pod yaml, `OTEL_INSTRUMENTATION_METHODS_INCLUDE=\\"test.Testing[methodB]\\"` so that there were traces showing from methodB.\\n\\n\\n## The technology behind the auto-instrumentation\\n\\nTo use the auto-instrumentation there is no specific need to understand the underlying mechanisms, but for those of you interested, here’s a quick outline. \\n1. The [OpenTelemetry Operator for Kubernetes](https://github.com/open-telemetry/opentelemetry-operator/) installs a [mutating webhook](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/), a standard Kubernetes component.\\n2. When deploying, Kubernetes first sends all definitions to the mutating webhook.\\n3. If the mutating webhook sees that the conditions for auto-instrumentation should be applied (ie \\n 1. there is an Instrumentation resource for that namespace and\\n 2. the correct annotation for that Instrumentation is applied to the definition in some way, either from the definition itself or from the namespace),\\n3. then the mutating webhook “mutates” the definition to include the environment defined by the Instrumentation resource.\\n4. The environment includes the explicit values defined in the env, as well as some implicit OpenTelemetry values (see the [OpenTelemetry Operator for Kubernetes](https://github.com/open-telemetry/opentelemetry-operator/) documentation for full details).\\n5. And most importantly, the operator\\n 1. pulls the image defined in the Instrumentation resource,\\n 2. extracts the file at the path `/javaagent.jar` from that image (using shell command `cp`)\\n 3. inserts it into the pod at path `/otel-auto-instrumentation-java/javaagent.jar`\\n 4. and adds the environment variable `JAVA_TOOL_OPTIONS=-javaagent:/otel-auto-instrumentation-java/javaagent.jar`.\\n6. The JVM automatically picks up that JAVA_TOOL_OPTIONS environment variable on startup and applies it to the JVM command-line.\\n\\n\\n## Next steps\\n\\nThis walkthrough can be repeated in any Kubernetes cluster to demonstrate and experiment with auto-instrumentation (you will need to create the banana namespace first). In part 2 of this two part series, [Using a custom agent with the OpenTelemetry Operator for Kubernetes](https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-elastic-agents), I show how to install any Java agent via the OpenTelemetry operator, using the Elastic Java agents as examples.\\n","code":"var Component=(()=>{var p=Object.create;var i=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var u=Object.getOwnPropertyNames;var g=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var b=(t,e)=>()=>(e||t((e={exports:{}}).exports,e),e.exports),y=(t,e)=>{for(var a in e)i(t,a,{get:e[a],enumerable:!0})},l=(t,e,a,r)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let o of u(e))!f.call(t,o)&&o!==a&&i(t,o,{get:()=>e[o],enumerable:!(r=m(e,o))||r.enumerable});return t};var v=(t,e,a)=>(a=t!=null?p(g(t)):{},l(e||!t||!t.__esModule?i(a,\\"default\\",{value:t,enumerable:!0}):a,t)),w=t=>l(i({},\\"__esModule\\",{value:!0}),t);var h=b((k,s)=>{s.exports=_jsx_runtime});var O={};y(O,{default:()=>d,frontmatter:()=>T});var n=v(h()),T={title:\\"Zero config OpenTelemetry auto-instrumentation for Kubernetes Java applications\\",slug:\\"using-the-otel-operator-for-injecting-java-agents\\",description:\\"Walking through how to install and enable the OpenTelemetry Operator for Kubernetes to auto-instrument Java applications, with no configuration changes needed for deployments\\",author:[{slug:\\"jack-shirazi\\"}],tags:[{slug:\\"opentelemetry\\"},{slug:\\"apm\\"},{slug:\\"containers\\"},{slug:\\"java\\"},{slug:\\"instrumentation\\"},{slug:\\"kubernetes\\"}],date:\\"2024-07-11\\",image:\\"blog-header.png\\"};function c(t){let e={a:\\"a\\",code:\\"code\\",em:\\"em\\",h2:\\"h2\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",...t.components};return(0,n.jsxs)(n.Fragment,{children:[(0,n.jsxs)(e.p,{children:[\\"The \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java agent\\"}),\\" has a number of \\",(0,n.jsx)(e.a,{href:\\"https://opentelemetry.io/docs/languages/java/automatic/#setup\\",rel:\\"nofollow\\",children:\\"ways to install\\"}),\\" the agent into a Java application. If you are running your Java applications in Kubernetes pods, there is a separate mechanism (which under the hood uses JAVA_TOOL_OPTIONS and other environment variables) to auto-instrument Java applications. This auto-instrumentation can be achieved with zero configuration of the applications and pods!\\"]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The mechanism to achieve zero-config auto-instrumentation of Java applications in Kubernetes is via the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-operator/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Operator for Kubernetes\\"}),\\". This operator has many capabilities and the full documentation (and of course source) is available in the project itself. In this blog, I\'ll walk through installing, setting up and running zero-config auto-instrumentation of Java applications in Kubernetes using the OpenTelemetry Operator.\\"]}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"installing-the-opentelemetry-operator\\",children:[\\"Installing the OpenTelemetry Operator\\",(0,n.jsx)(\\"a\\",{id:\\"installing-the-opentelemetry-operator\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"At the time of writing this blog, the OpenTelemetry Operator needs the certification manager to be installed, after which the operator can be installed. Installing from the web is straightforward. First install the \\",(0,n.jsx)(e.code,{children:\\"cert-manager\\"}),\\" (the version to be installed will be specified in the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-operator/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Operator for Kubernetes\\"}),\\" documentation):\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.4/cert-manager.yaml\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Then when the cert managers are ready (\\",(0,n.jsx)(e.code,{children:\\"kubectl get pods -n cert-manager\\"}),\\")\\\\xA0 ...\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`NAMESPACE\\\\xA0 \\\\xA0 \\\\xA0 NAME \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 READY\\ncert-manager \\\\xA0 cert-manager-67c98b89c8-rnr5s\\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 1/1\\ncert-manager \\\\xA0 cert-manager-cainjector-5c5695d979-q9hxz \\\\xA0 \\\\xA0 1/1\\ncert-manager \\\\xA0 cert-manager-webhook-7f9f8648b9-8gxgs\\\\xA0 \\\\xA0 \\\\xA0 \\\\xA0 1/1\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"... you can install the OpenTelemetry Operator:\\"}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`kubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/latest/download/opentelemetry-operator.yaml\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"You can, of course, use a specific version of the operator instead of the \\",(0,n.jsx)(e.code,{children:\\"latest\\"}),\\". But here I\\\\u2019ve used the \\",(0,n.jsx)(e.code,{children:\\"latest\\"}),\\" version.\\"]}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"an-instrumentation-resource\\",children:[\\"An Instrumentation resource\\",(0,n.jsx)(\\"a\\",{id:\\"an-instrumentation-resource\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now you need to add just one further Kubernetes resource to enable auto-instrumentation: an \\",(0,n.jsx)(e.code,{children:\\"Instrumentation\\"}),\\" resource. I am going to use the \\",(0,n.jsx)(e.code,{children:\\"banana\\"}),\\" namespace for my examples, so I have first created that namespace (\\",(0,n.jsx)(e.code,{children:\\"kubectl create namespace banana\\"}),\\"). The auto-instrumentation is specified and configured by these Instrumentation resources. Here is a basic one which will allow every Java pod in the \\",(0,n.jsx)(e.code,{children:\\"banana\\"}),\\" namespace to be auto-instrumented with version 2.5.0 of the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java agent\\"}),\\":\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`apiVersion: opentelemetry.io/v1alpha1\\nkind: Instrumentation\\nmetadata:\\n name: banana-instr\\n namespace: banana\\nspec:\\n exporter:\\n endpoint: \\"https://my.endpoint\\"\\n propagators:\\n - tracecontext\\n - baggage\\n - b3\\n sampler:\\n type: parentbased_traceidratio\\n argument: \\"1.0\\"\\n java:\\n image: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java:2.5.0\\n env:\\n - name: OTEL_EXPORTER_OTLP_HEADERS\\n value: \\"Authorization=Bearer MyAuth\\"\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Creating this resource (eg with \\",(0,n.jsx)(e.code,{children:\\"kubectl apply -f banana-instr.yaml\\"}),\\", assuming the above yaml was saved in file \\",(0,n.jsx)(e.code,{children:\\"banana-instr.yaml\\"}),\\") makes the \\",(0,n.jsx)(e.code,{children:\\"banana-instr\\"}),\\" Instrumentation resource available for use. (Note you will need to change \\",(0,n.jsx)(e.code,{children:\\"my.endpoint\\"}),\\" and \\",(0,n.jsx)(e.code,{children:\\"MyAuth\\"}),\\" to values appropriate for your collector.) You can use this instrumentation immediately by adding an annotation to any deployment in the \\",(0,n.jsx)(e.code,{children:\\"banana\\"}),\\" namespace:\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`metadata:\\n\\\\xA0\\\\xA0annotations:\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0instrumentation.opentelemetry.io/inject-java: \\"true\\"\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"The \\",(0,n.jsx)(e.code,{children:\\"banana-instr\\"}),\\" Instrumentation resource is not yet set to be applied by \\",(0,n.jsx)(e.em,{children:\\"default\\"}),\\" to all pods in the banana namespace. Currently it\'s zero-config as far as the \\",(0,n.jsx)(e.em,{children:\\"application\\"}),\\" is concerned, but it requires an annotation added to a \\",(0,n.jsx)(e.em,{children:\\"pod or deployment\\"}),\\". To make it fully zero-config for \\",(0,n.jsx)(e.em,{children:\\"all pods\\"}),\\" in the \\",(0,n.jsx)(e.code,{children:\\"banana\\"}),\\" namespace, we need to add that annotation to the namespace itself, ie editing the namespace (\\",(0,n.jsx)(e.code,{children:\\"kubectl edit namespace banana\\"}),\\") so it would then have contents similar to\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`apiVersion: v1\\nkind: Namespace\\nmetadata:\\n\\\\xA0\\\\xA0name: banana\\n\\\\xA0\\\\xA0annotations:\\n\\\\xA0\\\\xA0\\\\xA0\\\\xA0instrumentation.opentelemetry.io/inject-java: \\"banana-instr\\"\\n...\\n`})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"Now we have a namespace that is going to auto-instrument \\",(0,n.jsx)(e.em,{children:\\"every\\"}),\\" Java application deployed in the \\",(0,n.jsx)(e.code,{children:\\"banana\\"}),\\" namespace with the 2.5.0 \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-java-instrumentation/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Java agent\\"}),\\"!\\"]}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"trying-it\\",children:[\\"Trying it\\",(0,n.jsx)(\\"a\\",{id:\\"trying-it\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"There is a simple example Java application at \\",(0,n.jsx)(e.a,{href:\\"https://docker.elastic.co/demos/apm/k8s-webhook-test\\",rel:\\"nofollow\\",children:\\"docker.elastic.co/demos/apm/k8s-webhook-test\\"}),\\" which just repeatedly calls the chain \\",(0,n.jsx)(e.code,{children:\\"main->methodA->methodB->methodC->methodD\\"}),\\" with some sleeps in the calls. Running this (\\",(0,n.jsx)(e.code,{children:\\"kubectl apply -f banana-app.yaml\\"}),\\") using a very basic pod definition:\\"]}),`\\n`,(0,n.jsx)(e.pre,{children:(0,n.jsx)(e.code,{children:`apiVersion: v1\\nkind: Pod\\nmetadata:\\n name: banana-app\\n namespace: banana\\n labels:\\n app: banana-app\\nspec:\\n containers:\\n - image: docker.elastic.co/demos/apm/k8s-webhook-test\\n imagePullPolicy: Always\\n name: banana-app\\n env: \\n - name: OTEL_INSTRUMENTATION_METHODS_INCLUDE\\n value: \\"test.Testing[methodB]\\"\\n`})}),`\\n`,(0,n.jsx)(e.p,{children:\\"results in the app being auto-instrumented with no configuration changes! The resulting app shows up in any APM UI, such as Elastic APM\\"}),`\\n`,(0,n.jsx)(e.p,{children:(0,n.jsx)(e.img,{src:\\"/assets/images/using-the-otel-operator-for-injecting-java-agents/elastic-apm-ui-transaction.png\\",alt:\\"Elastic APM UI showing methodB traced\\",width:\\"1999\\",height:\\"1095\\"})}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"As you can see, for this example I also added this env var to the pod yaml, \\",(0,n.jsx)(e.code,{children:\'OTEL_INSTRUMENTATION_METHODS_INCLUDE=\\"test.Testing[methodB]\\"\'}),\\" so that there were traces showing from methodB.\\"]}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"the-technology-behind-the-auto-instrumentation\\",children:[\\"The technology behind the auto-instrumentation\\",(0,n.jsx)(\\"a\\",{id:\\"the-technology-behind-the-auto-instrumentation\\"})]}),`\\n`,(0,n.jsx)(e.p,{children:\\"To use the auto-instrumentation there is no specific need to understand the underlying mechanisms, but for those of you interested, here\\\\u2019s a quick outline.\\"}),`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsxs)(e.li,{children:[\\"The \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-operator/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Operator for Kubernetes\\"}),\\" installs a \\",(0,n.jsx)(e.a,{href:\\"https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/\\",rel:\\"nofollow\\",children:\\"mutating webhook\\"}),\\", a standard Kubernetes component.\\"]}),`\\n`,(0,n.jsx)(e.li,{children:\\"When deploying, Kubernetes first sends all definitions to the mutating webhook.\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"If the mutating webhook sees that the conditions for auto-instrumentation should be applied (ie\\",`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"there is an Instrumentation resource for that namespace and\\"}),`\\n`,(0,n.jsx)(e.li,{children:\\"the correct annotation for that Instrumentation is applied to the definition in some way, either from the definition itself or from the namespace),\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.li,{children:\\"then the mutating webhook \\\\u201Cmutates\\\\u201D the definition to include the environment defined by the Instrumentation resource.\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"The environment includes the explicit values defined in the env, as well as some implicit OpenTelemetry values (see the \\",(0,n.jsx)(e.a,{href:\\"https://github.com/open-telemetry/opentelemetry-operator/\\",rel:\\"nofollow\\",children:\\"OpenTelemetry Operator for Kubernetes\\"}),\\" documentation for full details).\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"And most importantly, the operator\\",`\\n`,(0,n.jsxs)(e.ol,{children:[`\\n`,(0,n.jsx)(e.li,{children:\\"pulls the image defined in the Instrumentation resource,\\"}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"extracts the file at the path \\",(0,n.jsx)(e.code,{children:\\"/javaagent.jar\\"}),\\" from that image (using shell command \\",(0,n.jsx)(e.code,{children:\\"cp\\"}),\\")\\"]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"inserts it into the pod at path \\",(0,n.jsx)(e.code,{children:\\"/otel-auto-instrumentation-java/javaagent.jar\\"})]}),`\\n`,(0,n.jsxs)(e.li,{children:[\\"and adds the environment variable \\",(0,n.jsx)(e.code,{children:\\"JAVA_TOOL_OPTIONS=-javaagent:/otel-auto-instrumentation-java/javaagent.jar\\"}),\\".\\"]}),`\\n`]}),`\\n`]}),`\\n`,(0,n.jsx)(e.li,{children:\\"The JVM automatically picks up that JAVA_TOOL_OPTIONS environment variable on startup and applies it to the JVM command-line.\\"}),`\\n`]}),`\\n`,(0,n.jsxs)(e.h2,{id:\\"next-steps\\",children:[\\"Next steps\\",(0,n.jsx)(\\"a\\",{id:\\"next-steps\\"})]}),`\\n`,(0,n.jsxs)(e.p,{children:[\\"This walkthrough can be repeated in any Kubernetes cluster to demonstrate and experiment with auto-instrumentation (you will need to create the banana namespace first). In part 2 of this two part series, \\",(0,n.jsx)(e.a,{href:\\"https://www.elastic.co/observability-labs/blog/using-the-otel-operator-for-injecting-elastic-agents\\",rel:\\"nofollow\\",children:\\"Using a custom agent with the OpenTelemetry Operator for Kubernetes\\"}),\\", I show how to install any Java agent via the OpenTelemetry operator, using the Elastic Java agents as examples.\\"]})]})}function d(t={}){let{wrapper:e}=t.components||{};return e?(0,n.jsx)(e,{...t,children:(0,n.jsx)(c,{...t})}):c(t)}return w(O);})();\\n;return Component;"},"_id":"articles/using-the-otel-operator-for-injecting-java-agents.mdx","_raw":{"sourceFilePath":"articles/using-the-otel-operator-for-injecting-java-agents.mdx","sourceFileName":"using-the-otel-operator-for-injecting-java-agents.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/using-the-otel-operator-for-injecting-java-agents"},"type":"Article","imageUrl":"/assets/images/using-the-otel-operator-for-injecting-java-agents/blog-header.png","readingTime":"6 min read","url":"/using-the-otel-operator-for-injecting-java-agents","headings":[{"level":2,"title":"Installing the OpenTelemetry Operator","href":"#installing-the-opentelemetry-operatora-idinstalling-the-opentelemetry-operatora"},{"level":2,"title":"An Instrumentation resource","href":"#an-instrumentation-resourcea-idan-instrumentation-resourcea"},{"level":2,"title":"Trying it","href":"#trying-ita-idtrying-ita"},{"level":2,"title":"The technology behind the auto-instrumentation","href":"#the-technology-behind-the-auto-instrumentationa-idthe-technology-behind-the-auto-instrumentationa"},{"level":2,"title":"Next steps","href":"#next-stepsa-idnext-stepsa"}]},{"title":"Easily analyze AWS VPC Flow Logs with Elastic Observability","slug":"vpc-flow-logs-monitoring-analytics-observability","date":"2023-01-23","description":"Elastic Observability can ingest and help analyze AWS VPC Flow Logs from your application’s VPC. Learn how to ingest AWS VPC Flow Logs through a step-by-step method into Elastic, then analyze it and apply OOTB machine learning for insights.","image":"patterns-midnight-background-no-logo-observability.png","author":[{"slug":"bahubali-shetti","type":"Author","_raw":{}}],"tags":[{"slug":"aws","type":"Tag","_raw":{}},{"slug":"metrics","type":"Tag","_raw":{}},{"slug":"aws-vpc-flow","type":"Tag","_raw":{}},{"slug":"log-analytics","type":"Tag","_raw":{}}],"body":{"raw":"\\nElastic Observability provides a full-stack observability solution, by supporting metrics, traces, and logs for applications and infrastructure. In [a previous blog](https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy), I showed you an [AWS monitoring](https://www.elastic.co/observability/aws-monitoring) infrastructure running a three-tier application. Specifically we reviewed metrics ingest and analysis on Elastic Observability for EC2, VPC, ELB, and RDS. In this blog, we will cover how to ingest logs from AWS, and more specifically, we will review how to get VPC Flow Logs into Elastic and what you can do with this data.\\n\\nLogging is an important part of observability, for which we generally think of metrics and/or tracing. However, the amount of logs an application or the underlying infrastructure output can be significantly daunting.\\n\\nWith Elastic Observability, there are three main mechanisms to ingest logs:\\n\\n- The new Elastic Agent pulls metrics and logs from CloudWatch and S3 where logs are generally pushed from a service (for example, EC2, ELB, WAF, Route53, etc ). We reviewed Elastic agent metrics configuration for EC2, RDS (Aurora), ELB, and NAT metrics in this [blog](https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy).\\n- Using [Elastic’s Serverless Forwarder (runs on Lambda and available in AWS SAR)](https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3) to send logs from Firehose, S3, CloudWatch, and other AWS services into Elastic.\\n- Beta feature (contact your Elastic account team): Using AWS Firehose to directly insert logs from AWS into Elastic — specifically if you are running the Elastic stack on AWS infrastructure.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/Elastic-Observability-VPC-Flow-Logs.jpg)\\n\\nIn this blog we will provide an overview of the second option, Elastic’s serverless forwarder collecting VPC Flow Logs from an application deployed on EC2 instances. Here’s what we\'ll cover:\\n\\n- A walk-through on how to analyze VPC Flow Log info with Elastic’s Discover, dashboard, and ML analysis.\\n- A detailed step-by-step overview and setup of the Elastic serverless forwarder on AWS as a pipeline for VPC Flow Logs into [Elastic Cloud](https://cloud.elastic.co).\\n\\n## Elastic’s serverless forwarder on AWS Lambda\\n\\nAWS users can quickly ingest logs stored in Amazon S3, CloudWatch, or Kinesis with the Elastic serverless forwarder, an AWS Lambda application, and view them in the Elastic Stack alongside other logs and metrics for centralized analytics. Once the AWS serverless forwarder is configured and deployed from AWS, Serverless Application Registry (SAR) logs will be ingested and available in Elastic for analysis. See the following links for further configuration guidance:\\n\\n- [Elastic’s serverless forwarder (runs Lambda and available in AWS SAR)](https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3)\\n- [Serverless forwarder GitHub repo](https://github.com/elastic/elastic-serverless-forwarder/blob/main/docs/README-AWS.md#s3_config_file)\\n\\nIn our configuration we will ingest VPC Flow Logs into Elastic for the three-tier app deployed in the previous [blog](https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy).\\n\\nThere are three different configurations with the Elastic serverless forwarder:\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-3-configurations.png)\\n\\nLogs can be directly ingested from:\\n\\n- **Amazon CloudWatch:** Elastic serverless forwarder can pull VPC Flow Logs directly from an Amazon CloudWatch log group, which is a commonly used endpoint to store VPC Flow Logs in AWS.\\n- **Amazon Kinesis:** Elastic serverless forwarder can pull VPC Flow Logs directly from Kinesis, which is another location to [publish VPC Flow Logs](https://docs.aws.amazon.com/vpc/latest/userguide/flow-logs-firehose.html).\\n- **Amazon S3:** Elastic serverless forwarder can pull VPC Flow Logs from Amazon S3 via SQS event notifications, which is a common endpoint to publish VPC Flow Logs in AWS.\\n\\nWe will review how to utilize a common configuration, which is to send VPC Flow Logs to Amazon S3 and into Elastic Cloud in the second half of this blog.\\n\\nBut first let\'s review how to analyze VPC Flow Logs on Elastic.\\n\\n## Analyzing VPC Flow Logs in Elastic\\n\\nNow that you have VPC Flow Logs in Elastic Cloud, how can you analyze them?\\n\\nThere are several analyses you can perform on the VPC Flow Log data:\\n\\n1. Use Elastic’s Analytics Discover capabilities to manually analyze the data.\\n2. Use Elastic Observability’s anomaly feature to identify anomalies in the logs.\\n3. Use an out-of-the-box (OOTB) dashboard to further analyze data.\\n\\n### Using Elastic Discover\\n\\nIn Elastic analytics, you can search and filter your data, get information about the structure of the fields, and display your findings in a visualization. You can also customize and save your searches and place them on a dashboard. With Discover, you can:\\n\\n- View logs in bulk, within specific time frames\\n- Look at individual details of each entry (document)\\n- Filter for specific values\\n- Analyze fields\\n- Create and save searches\\n- Build visualizations\\n\\nFor a complete understanding of Discover and all of Elastic’s analytics capabilities, look at [Elastic documentation](https://www.elastic.co/guide/en/kibana/current/discover.html#).\\n\\nFor VPC Flow Logs, an important stat is to understand:\\n\\n- How many logs were accepted/rejected\\n- Where potential security violations are occur (for example, source IPs from outside the VPC)\\n- What port is generally being queried\\n\\nI’ve filtered the logs on the following:\\n\\n- Amazon S3: bshettisartest\\n- VPC Flow Log action: REJECT\\n- VPC Network Interface: Webserver 1\\n\\nWe want to see what IP addresses are trying to hit our web servers.\\n\\nFrom that, we want to understand which IP addresses we are getting the most REJECTS from, and we simply find the **source**.ip field. Then, we can quickly get a breakdown that shows 185.242.53.156 is the most rejected for the last 3+ hours we’ve turned on VPC Flow Logs.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-100-hits.png)\\n\\nAdditionally, I can see a visualization by selecting the “Visualize” button. We get the following, which we can add to a dashboard:\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-add-to-a-dashboard.png)\\n\\nIn addition to IP addresses, we want to also see what port is being hit on our web servers. \\nWe select the destination port field, and the quick pop-up shows us a list of ports being targeted. We can see that port 23 is being targeted (this port is generally used for telnet), port 445 is being targeted (used for Microsoft Active Directory), and port 433 (used for https ssl). We also see these are all REJECT.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-reject.png)\\n\\n### Anomaly detection in Elastic Observability logs\\n\\nAddition to Discover, Elastic Observability provides the ability to detect anomalies on logs. In Elastic Observability -\\\\> logs -\\\\> anomalies you can turn on machine learning for:\\n\\n- Log rate: automatically detects anomalous log entry rates\\n- Categorization: automatically categorizes log messages\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-anomaly-detection-with-machine-learning.png)\\n\\nFor our VPC Flow Log, we turned both on. And when we look at what has been detected for anomalous log entry rates, we see:\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-anomalies.png)\\n\\nElastic immediately detected a spike in logs when we turned on VPC Flow Logs for our application. The rate change is being detected because we’re also ingesting VPC Flow Logs from another application for a couple of days prior to adding the application in this blog.\\n\\nWe can further drill down into this anomaly with machine learning and analyze further.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-anomaly-explorer.png)\\n\\nThere is more machine learning analysis you can utilize with your logs — check out [Elastic machine learning documentation](https://www.elastic.co/guide/en/kibana/8.5/xpack-ml.html).\\n\\nSince we know that a spike exists, we can also use Elastic AIOps Labs Explain Log Rate Spikes capability in Machine Learning. Additionally, we’ve grouped them to see what is causing some of the spikes.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-explain-log-rate-spikes.png)\\n\\nAs we can see, a specific network interface is sending more VPC log flows than others. We can further drill down into this further in Discover.\\n\\n### VPC Flow Log dashboard on Elastic Observability\\n\\nFinally, Elastic also provides an OOTB dashboard to showing the top IP addresses hitting your VPC, geographically where they are coming from, the time series of the flows, and a summary of VPC Flow Log rejects within the time frame.\\n\\nThis is a baseline dashboard that can be enhanced with visualizations you find in Discover, as we reviewed in option 1 (Using Elastic’s Analytics Discover capabilities) above.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-action-geolocation.png)\\n\\n## Setting it all up\\n\\nLet’s walk through the details of configuring Amazon Kinesis Data Firehose and Elastic Observability to ingest data.\\n\\n### Prerequisites and config\\n\\nIf you plan on following steps, here are some of the components and details we used to set up this demonstration:\\n\\n- Ensure you have an account on [Elastic Cloud](https://cloud.elastic.co) and a deployed stack ([see instructions here](https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html)) on AWS. Deploying this on AWS is required for Elastic Serverless Forwarder.\\n- Ensure you have an AWS account with permissions to pull the necessary data from AWS. Specifically, ensure you can configure the agent to pull data from AWS as needed. [Please look at the documentation for details](https://docs.elastic.co/integrations/aws#requirements).\\n- We used [AWS’s three-tier app](https://github.com/aws-samples/aws-three-tier-web-architecture-workshop) and installed it as instructed in GitHub. ([See blog on ingesting metrics from the AWS services supporting this app](https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy).)\\n- Configure and install Elastic’s Serverless Forwarder.\\n- Ensure you turn on VPC Flow Logs for the VPC where the application is deployed and send logs to AWS Firehose.\\n\\n### Step 0: Get an account on Elastic Cloud\\n\\nFollow the instructions to [get started on Elastic Cloud](https://cloud.elastic.co/registration?fromURI=/home).\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-start-cloud-trial.png)\\n\\n### Step 1: Deploy Elastic on AWS\\n\\nOnce logged in to Elastic Cloud, create a deployment on AWS. It’s important to ensure that the deployment is on AWS. The Amazon Kinesis Data Firehose connects specifically to an endpoint that needs to be on AWS.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-create-a-deployment.png)\\n\\nOnce your deployment is created, make sure you copy the Elasticsearch endpoint.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-aws-logs.png)\\n\\nThe endpoint should be an AWS endpoint, such as:\\n\\n```bash\\nhttps://aws-logs.es.us-east-1.aws.found.io\\n```\\n\\n### Step 2: Turn on Elastic’s AWS Integrations on AWS\\n\\nIn your deployment’s Elastic Integration section, go to the AWS integration and select Install AWS assets.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-aws-settings.png)\\n\\n### Step 3: Deploy your application\\n\\nFollow the instructions listed out in [AWS’s Three-Tier app](https://github.com/aws-samples/aws-three-tier-web-architecture-workshop) and instructions in the workshop link on GitHub. The workshop is listed [here](https://catalog.us-east-1.prod.workshops.aws/workshops/85cd2bb2-7f79-4e96-bdee-8078e469752a/en-US).\\n\\nOnce you’ve installed the app, get credentials from AWS. This will be needed for Elastic’s AWS integration.\\n\\nThere are several options for credentials:\\n\\n- Use access keys directly\\n- Use temporary security credentials\\n- Use a shared credentials file\\n- Use an IAM role Amazon Resource Name (ARN)\\n\\nView more details on specifics around necessary [credentials](https://docs.elastic.co/en/integrations/aws#aws-credentials) and [permissions](https://docs.elastic.co/en/integrations/aws#aws-permissions).\\n\\n### Step 4: Send VPC Flow Logs to Amazon S3 and set up Amazon SQS\\n\\nIn the VPC for the application deployed in Step 3, you will need to configure VPC Flow Logs and point them to an Amazon S3 bucket. Specifically, you will want to keep it as AWS default format.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-create-flow-log.png)\\n\\nCreate the VPC Flow log.\\n\\nNext:\\n\\n- [Set up an Amazon SQS queue](https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-getting-started.html)\\n- [Configure Amazon S3 event notifications](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ways-to-add-notification-config-to-bucket.html)\\n\\n### Step 5: Set up Elastic Serverless Forwarder on AWS\\n\\nFollow instructions listed in [Elastic’s documentation](https://www.elastic.co/guide/en/observability/8.5/aws-deploy-elastic-serverless-forwarder.html) and refer to the [previous blog](https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3) providing an overview. The important bits during the configuration in Lambda’s application repository are to ensure you:\\n\\n- Specify the S3 Bucket in ElasticServerlessForwarderS3Buckets where the VPC Flow Logs are being sent. The value is the ARN of the S3 Bucket you created in Step 4.\\n- Specify the configuration file path in ElasticServerlessForwarderS3ConfigFile. The value is the S3 url in the format \\"s3://bucket-name/config-file-name\\" pointing to the configuration file (sarconfig.yaml).\\n- Specify the S3 SQS Notifications queue used as the trigger of the Lambda function in ElasticServerlessForwarderS3SQSEvents. The value is the ARN of the SQS Queue you set up in Step 4.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-application-settings.png)\\n\\nOnce Amazon CloudFormation finishes setting up Elastic serverless forwarder, you should see two Amazon Lambda functions:\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-functions.png)\\n\\nIn order to check if logs are coming in, go to the function with “ **ApplicationElasticServer** ” in the name, and go to monitor and look at **logs**. You should see the logs being pulled from S3.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-function-overview.png)\\n\\n### Step 6: Check and ensure you have logs in Elastic\\n\\nNow that steps 1–4 are complete, you can go to Elastic’s Discover capability and you should see VPC Flow Logs coming in. In the image below, we’ve filtered by Amazon S3 bucket **bshettisartest**.\\n\\n![](/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-log-dashboard-filter.png)\\n\\n## Conclusion: Elastic Observability easily integrates with VPC Flow Logs for analytics, alerting, and insights\\n\\nI hope you’ve gotten an appreciation for how Elastic Observability can help you manage AWS VPC Flow Logs. Here’s a quick recap of lessons and what you learned:\\n\\n- A walk-through of how Elastic Observability provides enhanced analysis for VPC Flow Logs:\\n - Using Elastic’s Analytics Discover capabilities to manually analyze the data\\n - Leveraging Elastic Observability’s anomaly features to:\\n - Identify anomalies in the VPC flow logs\\n - Detects anomalous log entry rates\\n - Automatically categorizes log messages\\n - Using an OOTB dashboard to further analyze data\\n- A more detailed walk-through of how to set up the Elastic Serverless Forwarder\\n\\nStart your own [7-day free trial](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el) by signing up via [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=d54b31eb-671c-49ba-88bb-7a1106421dfa%E2%89%BBchannel=el) and quickly spin up a deployment in minutes on any of the [Elastic Cloud regions on AWS](https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions) around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\n\\n### Additional logging resources:\\n\\n- [Getting started with logging on Elastic (quickstart)](https://www.elastic.co/getting-started/observability/collect-and-analyze-logs)\\n- [Ingesting common known logs via integrations (compute node example)](https://www.elastic.co/guide/en/observability/current/logs-metrics-get-started.html)\\n- [List of integrations](https://docs.elastic.co/integrations)\\n- [Ingesting custom application logs into Elastic](https://www.elastic.co/blog/log-monitoring-management-enterprise)\\n- [Enriching logs in Elastic](https://www.elastic.co/blog/observability-logs-parsing-schema-read-write)\\n- Analyzing Logs with [Anomaly Detection (ML)](https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability) and [AIOps](https://www.elastic.co/blog/observability-logs-machine-learning-aiops)\\n\\n### Common use case examples with logs:\\n\\n- [Nginx log management](https://youtu.be/ax04ZFWqVCg)\\n- [AWS VPC Flow log management](https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability)\\n- [Using OpenAI to analyze Kubernetes errors](https://www.elastic.co/blog/kubernetes-errors-elastic-observability-logs-openai)\\n- [PostgreSQL issue analysis with AIOps](https://youtu.be/Li5TJAWbz8Q)\\n","code":"var Component=(()=>{var g=Object.create;var t=Object.defineProperty;var p=Object.getOwnPropertyDescriptor;var w=Object.getOwnPropertyNames;var u=Object.getPrototypeOf,f=Object.prototype.hasOwnProperty;var m=(n,e)=>()=>(e||n((e={exports:{}}).exports,e),e.exports),b=(n,e)=>{for(var o in e)t(n,o,{get:e[o],enumerable:!0})},a=(n,e,o,s)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let l of w(e))!f.call(n,l)&&l!==o&&t(n,l,{get:()=>e[l],enumerable:!(s=p(e,l))||s.enumerable});return n};var y=(n,e,o)=>(o=n!=null?g(u(n)):{},a(e||!n||!n.__esModule?t(o,\\"default\\",{value:n,enumerable:!0}):o,n)),v=n=>a(t({},\\"__esModule\\",{value:!0}),n);var c=m((C,r)=>{r.exports=_jsx_runtime});var A={};b(A,{default:()=>d,frontmatter:()=>S});var i=y(c()),S={title:\\"Easily analyze AWS VPC Flow Logs with Elastic Observability\\",slug:\\"vpc-flow-logs-monitoring-analytics-observability\\",date:\\"2023-01-23\\",description:\\"Elastic Observability can ingest and help analyze AWS VPC Flow Logs from your application\\\\u2019s VPC. Learn how to ingest AWS VPC Flow Logs through a step-by-step method into Elastic, then analyze it and apply OOTB machine learning for insights.\\",author:[{slug:\\"bahubali-shetti\\"}],image:\\"patterns-midnight-background-no-logo-observability.png\\",tags:[{slug:\\"aws\\"},{slug:\\"metrics\\"},{slug:\\"aws-vpc-flow\\"},{slug:\\"log-analytics\\"}]};function h(n){let e={a:\\"a\\",br:\\"br\\",code:\\"code\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",pre:\\"pre\\",strong:\\"strong\\",ul:\\"ul\\",...n.components};return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(e.p,{children:[\\"Elastic Observability provides a full-stack observability solution, by supporting metrics, traces, and logs for applications and infrastructure. In \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"a previous blog\\"}),\\", I showed you an \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/observability/aws-monitoring\\",rel:\\"nofollow\\",children:\\"AWS monitoring\\"}),\\" infrastructure running a three-tier application. Specifically we reviewed metrics ingest and analysis on Elastic Observability for EC2, VPC, ELB, and RDS. In this blog, we will cover how to ingest logs from AWS, and more specifically, we will review how to get VPC Flow Logs into Elastic and what you can do with this data.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Logging is an important part of observability, for which we generally think of metrics and/or tracing. However, the amount of logs an application or the underlying infrastructure output can be significantly daunting.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"With Elastic Observability, there are three main mechanisms to ingest logs:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[\\"The new Elastic Agent pulls metrics and logs from CloudWatch and S3 where logs are generally pushed from a service (for example, EC2, ELB, WAF, Route53, etc ). We reviewed Elastic agent metrics configuration for EC2, RDS (Aurora), ELB, and NAT metrics in this \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Using \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s Serverless Forwarder (runs on Lambda and available in AWS SAR)\\"}),\\" to send logs from Firehose, S3, CloudWatch, and other AWS services into Elastic.\\"]}),`\\n`,(0,i.jsx)(e.li,{children:\\"Beta feature (contact your Elastic account team): Using AWS Firehose to directly insert logs from AWS into Elastic \\\\u2014 specifically if you are running the Elastic stack on AWS infrastructure.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/Elastic-Observability-VPC-Flow-Logs.jpg\\",alt:\\"\\",width:\\"2568\\",height:\\"1322\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"In this blog we will provide an overview of the second option, Elastic\\\\u2019s serverless forwarder collecting VPC Flow Logs from an application deployed on EC2 instances. Here\\\\u2019s what we\'ll cover:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"A walk-through on how to analyze VPC Flow Log info with Elastic\\\\u2019s Discover, dashboard, and ML analysis.\\"}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"A detailed step-by-step overview and setup of the Elastic serverless forwarder on AWS as a pipeline for VPC Flow Logs into \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\".\\"]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h2,{id:\\"elastics-serverless-forwarder-on-aws-lambda\\",children:\\"Elastic\\\\u2019s serverless forwarder on AWS Lambda\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"AWS users can quickly ingest logs stored in Amazon S3, CloudWatch, or Kinesis with the Elastic serverless forwarder, an AWS Lambda application, and view them in the Elastic Stack alongside other logs and metrics for centralized analytics. Once the AWS serverless forwarder is configured and deployed from AWS, Serverless Application Registry (SAR) logs will be ingested and available in Elastic for analysis. See the following links for further configuration guidance:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s serverless forwarder (runs Lambda and available in AWS SAR)\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://github.com/elastic/elastic-serverless-forwarder/blob/main/docs/README-AWS.md#s3_config_file\\",rel:\\"nofollow\\",children:\\"Serverless forwarder GitHub repo\\"})}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"In our configuration we will ingest VPC Flow Logs into Elastic for the three-tier app deployed in the previous \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"blog\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"There are three different configurations with the Elastic serverless forwarder:\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-3-configurations.png\\",alt:\\"\\",width:\\"1644\\",height:\\"669\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Logs can be directly ingested from:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Amazon CloudWatch:\\"}),\\" Elastic serverless forwarder can pull VPC Flow Logs directly from an Amazon CloudWatch log group, which is a commonly used endpoint to store VPC Flow Logs in AWS.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Amazon Kinesis:\\"}),\\" Elastic serverless forwarder can pull VPC Flow Logs directly from Kinesis, which is another location to \\",(0,i.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/vpc/latest/userguide/flow-logs-firehose.html\\",rel:\\"nofollow\\",children:\\"publish VPC Flow Logs\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[(0,i.jsx)(e.strong,{children:\\"Amazon S3:\\"}),\\" Elastic serverless forwarder can pull VPC Flow Logs from Amazon S3 via SQS event notifications, which is a common endpoint to publish VPC Flow Logs in AWS.\\"]}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"We will review how to utilize a common configuration, which is to send VPC Flow Logs to Amazon S3 and into Elastic Cloud in the second half of this blog.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"But first let\'s review how to analyze VPC Flow Logs on Elastic.\\"}),`\\n`,(0,i.jsx)(e.h2,{id:\\"analyzing-vpc-flow-logs-in-elastic\\",children:\\"Analyzing VPC Flow Logs in Elastic\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Now that you have VPC Flow Logs in Elastic Cloud, how can you analyze them?\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"There are several analyses you can perform on the VPC Flow Log data:\\"}),`\\n`,(0,i.jsxs)(e.ol,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Use Elastic\\\\u2019s Analytics Discover capabilities to manually analyze the data.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Use Elastic Observability\\\\u2019s anomaly feature to identify anomalies in the logs.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Use an out-of-the-box (OOTB) dashboard to further analyze data.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"using-elastic-discover\\",children:\\"Using Elastic Discover\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"In Elastic analytics, you can search and filter your data, get information about the structure of the fields, and display your findings in a visualization. You can also customize and save your searches and place them on a dashboard. With Discover, you can:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"View logs in bulk, within specific time frames\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Look at individual details of each entry (document)\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Filter for specific values\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Analyze fields\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Create and save searches\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Build visualizations\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"For a complete understanding of Discover and all of Elastic\\\\u2019s analytics capabilities, look at \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/current/discover.html#\\",rel:\\"nofollow\\",children:\\"Elastic documentation\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"For VPC Flow Logs, an important stat is to understand:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"How many logs were accepted/rejected\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Where potential security violations are occur (for example, source IPs from outside the VPC)\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"What port is generally being queried\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"I\\\\u2019ve filtered the logs on the following:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Amazon S3: bshettisartest\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"VPC Flow Log action: REJECT\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"VPC Network Interface: Webserver 1\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:\\"We want to see what IP addresses are trying to hit our web servers.\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"From that, we want to understand which IP addresses we are getting the most REJECTS from, and we simply find the \\",(0,i.jsx)(e.strong,{children:\\"source\\"}),\\".ip field. Then, we can quickly get a breakdown that shows 185.242.53.156 is the most rejected for the last 3+ hours we\\\\u2019ve turned on VPC Flow Logs.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-100-hits.png\\",alt:\\"\\",width:\\"1489\\",height:\\"950\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Additionally, I can see a visualization by selecting the \\\\u201CVisualize\\\\u201D button. We get the following, which we can add to a dashboard:\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-add-to-a-dashboard.png\\",alt:\\"\\",width:\\"1999\\",height:\\"898\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"In addition to IP addresses, we want to also see what port is being hit on our web servers.\\",(0,i.jsx)(e.br,{}),`\\n`,\\"We select the destination port field, and the quick pop-up shows us a list of ports being targeted. We can see that port 23 is being targeted (this port is generally used for telnet), port 445 is being targeted (used for Microsoft Active Directory), and port 433 (used for https ssl). We also see these are all REJECT.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-reject.png\\",alt:\\"\\",width:\\"1537\\",height:\\"817\\"})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"anomaly-detection-in-elastic-observability-logs\\",children:\\"Anomaly detection in Elastic Observability logs\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Addition to Discover, Elastic Observability provides the ability to detect anomalies on logs. In Elastic Observability -> logs -> anomalies you can turn on machine learning for:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Log rate: automatically detects anomalous log entry rates\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Categorization: automatically categorizes log messages\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-anomaly-detection-with-machine-learning.png\\",alt:\\"\\",width:\\"1999\\",height:\\"789\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"For our VPC Flow Log, we turned both on. And when we look at what has been detected for anomalous log entry rates, we see:\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-anomalies.png\\",alt:\\"\\",width:\\"1999\\",height:\\"988\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Elastic immediately detected a spike in logs when we turned on VPC Flow Logs for our application. The rate change is being detected because we\\\\u2019re also ingesting VPC Flow Logs from another application for a couple of days prior to adding the application in this blog.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"We can further drill down into this anomaly with machine learning and analyze further.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-anomaly-explorer.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1228\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"There is more machine learning analysis you can utilize with your logs \\\\u2014 check out \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/kibana/8.5/xpack-ml.html\\",rel:\\"nofollow\\",children:\\"Elastic machine learning documentation\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Since we know that a spike exists, we can also use Elastic AIOps Labs Explain Log Rate Spikes capability in Machine Learning. Additionally, we\\\\u2019ve grouped them to see what is causing some of the spikes.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-explain-log-rate-spikes.png\\",alt:\\"\\",width:\\"1866\\",height:\\"936\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"As we can see, a specific network interface is sending more VPC log flows than others. We can further drill down into this further in Discover.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"vpc-flow-log-dashboard-on-elastic-observability\\",children:\\"VPC Flow Log dashboard on Elastic Observability\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Finally, Elastic also provides an OOTB dashboard to showing the top IP addresses hitting your VPC, geographically where they are coming from, the time series of the flows, and a summary of VPC Flow Log rejects within the time frame.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"This is a baseline dashboard that can be enhanced with visualizations you find in Discover, as we reviewed in option 1 (Using Elastic\\\\u2019s Analytics Discover capabilities) above.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-action-geolocation.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1302\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"setting-it-all-up\\",children:\\"Setting it all up\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Let\\\\u2019s walk through the details of configuring Amazon Kinesis Data Firehose and Elastic Observability to ingest data.\\"}),`\\n`,(0,i.jsx)(e.h3,{id:\\"prerequisites-and-config\\",children:\\"Prerequisites and config\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"If you plan on following steps, here are some of the components and details we used to set up this demonstration:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[\\"Ensure you have an account on \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co\\",rel:\\"nofollow\\",children:\\"Elastic Cloud\\"}),\\" and a deployed stack (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/elastic-stack/current/installing-elastic-stack.html\\",rel:\\"nofollow\\",children:\\"see instructions here\\"}),\\") on AWS. Deploying this on AWS is required for Elastic Serverless Forwarder.\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Ensure you have an AWS account with permissions to pull the necessary data from AWS. Specifically, ensure you can configure the agent to pull data from AWS as needed. \\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations/aws#requirements\\",rel:\\"nofollow\\",children:\\"Please look at the documentation for details\\"}),\\".\\"]}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"We used \\",(0,i.jsx)(e.a,{href:\\"https://github.com/aws-samples/aws-three-tier-web-architecture-workshop\\",rel:\\"nofollow\\",children:\\"AWS\\\\u2019s three-tier app\\"}),\\" and installed it as instructed in GitHub. (\\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/aws-service-metrics-monitor-observability-easy\\",rel:\\"nofollow\\",children:\\"See blog on ingesting metrics from the AWS services supporting this app\\"}),\\".)\\"]}),`\\n`,(0,i.jsx)(e.li,{children:\\"Configure and install Elastic\\\\u2019s Serverless Forwarder.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Ensure you turn on VPC Flow Logs for the VPC where the application is deployed and send logs to AWS Firehose.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-0-get-an-account-on-elastic-cloud\\",children:\\"Step 0: Get an account on Elastic Cloud\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Follow the instructions to \\",(0,i.jsx)(e.a,{href:\\"https://cloud.elastic.co/registration?fromURI=/home\\",rel:\\"nofollow\\",children:\\"get started on Elastic Cloud\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-start-cloud-trial.png\\",alt:\\"\\",width:\\"884\\",height:\\"654\\"})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-1-deploy-elastic-on-aws\\",children:\\"Step 1: Deploy Elastic on AWS\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once logged in to Elastic Cloud, create a deployment on AWS. It\\\\u2019s important to ensure that the deployment is on AWS. The Amazon Kinesis Data Firehose connects specifically to an endpoint that needs to be on AWS.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-create-a-deployment.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1582\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once your deployment is created, make sure you copy the Elasticsearch endpoint.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-aws-logs.png\\",alt:\\"\\",width:\\"1999\\",height:\\"1157\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"The endpoint should be an AWS endpoint, such as:\\"}),`\\n`,(0,i.jsx)(e.pre,{children:(0,i.jsx)(e.code,{className:\\"language-bash\\",children:`https://aws-logs.es.us-east-1.aws.found.io\\n`})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-2-turn-on-elastics-aws-integrations-on-aws\\",children:\\"Step 2: Turn on Elastic\\\\u2019s AWS Integrations on AWS\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"In your deployment\\\\u2019s Elastic Integration section, go to the AWS integration and select Install AWS assets.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-aws-settings.png\\",alt:\\"\\",width:\\"1999\\",height:\\"684\\"})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-3-deploy-your-application\\",children:\\"Step 3: Deploy your application\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Follow the instructions listed out in \\",(0,i.jsx)(e.a,{href:\\"https://github.com/aws-samples/aws-three-tier-web-architecture-workshop\\",rel:\\"nofollow\\",children:\\"AWS\\\\u2019s Three-Tier app\\"}),\\" and instructions in the workshop link on GitHub. The workshop is listed \\",(0,i.jsx)(e.a,{href:\\"https://catalog.us-east-1.prod.workshops.aws/workshops/85cd2bb2-7f79-4e96-bdee-8078e469752a/en-US\\",rel:\\"nofollow\\",children:\\"here\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once you\\\\u2019ve installed the app, get credentials from AWS. This will be needed for Elastic\\\\u2019s AWS integration.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"There are several options for credentials:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Use access keys directly\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Use temporary security credentials\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Use a shared credentials file\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Use an IAM role Amazon Resource Name (ARN)\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"View more details on specifics around necessary \\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#aws-credentials\\",rel:\\"nofollow\\",children:\\"credentials\\"}),\\" and \\",(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/en/integrations/aws#aws-permissions\\",rel:\\"nofollow\\",children:\\"permissions\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-4-send-vpc-flow-logs-to-amazon-s3-and-set-up-amazon-sqs\\",children:\\"Step 4: Send VPC Flow Logs to Amazon S3 and set up Amazon SQS\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"In the VPC for the application deployed in Step 3, you will need to configure VPC Flow Logs and point them to an Amazon S3 bucket. Specifically, you will want to keep it as AWS default format.\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-create-flow-log.png\\",alt:\\"\\",width:\\"832\\",height:\\"1131\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Create the VPC Flow log.\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"Next:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-getting-started.html\\",rel:\\"nofollow\\",children:\\"Set up an Amazon SQS queue\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://docs.aws.amazon.com/AmazonS3/latest/userguide/ways-to-add-notification-config-to-bucket.html\\",rel:\\"nofollow\\",children:\\"Configure Amazon S3 event notifications\\"})}),`\\n`]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-5-set-up-elastic-serverless-forwarder-on-aws\\",children:\\"Step 5: Set up Elastic Serverless Forwarder on AWS\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Follow instructions listed in \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/8.5/aws-deploy-elastic-serverless-forwarder.html\\",rel:\\"nofollow\\",children:\\"Elastic\\\\u2019s documentation\\"}),\\" and refer to the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/elastic-and-aws-serverless-application-repository-speed-time-to-actionable-insights-with-frictionless-log-ingestion-from-amazon-s3\\",rel:\\"nofollow\\",children:\\"previous blog\\"}),\\" providing an overview. The important bits during the configuration in Lambda\\\\u2019s application repository are to ensure you:\\"]}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Specify the S3 Bucket in ElasticServerlessForwarderS3Buckets where the VPC Flow Logs are being sent. The value is the ARN of the S3 Bucket you created in Step 4.\\"}),`\\n`,(0,i.jsx)(e.li,{children:\'Specify the configuration file path in ElasticServerlessForwarderS3ConfigFile. The value is the S3 url in the format \\"s3://bucket-name/config-file-name\\" pointing to the configuration file (sarconfig.yaml).\'}),`\\n`,(0,i.jsx)(e.li,{children:\\"Specify the S3 SQS Notifications queue used as the trigger of the Lambda function in ElasticServerlessForwarderS3SQSEvents. The value is the ARN of the SQS Queue you set up in Step 4.\\"}),`\\n`]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-application-settings.png\\",alt:\\"\\",width:\\"1019\\",height:\\"904\\"})}),`\\n`,(0,i.jsx)(e.p,{children:\\"Once Amazon CloudFormation finishes setting up Elastic serverless forwarder, you should see two Amazon Lambda functions:\\"}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-functions.png\\",alt:\\"\\",width:\\"1164\\",height:\\"274\\"})}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"In order to check if logs are coming in, go to the function with \\\\u201C \\",(0,i.jsx)(e.strong,{children:\\"ApplicationElasticServer\\"}),\\" \\\\u201D in the name, and go to monitor and look at \\",(0,i.jsx)(e.strong,{children:\\"logs\\"}),\\". You should see the logs being pulled from S3.\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-logs-function-overview.png\\",alt:\\"\\",width:\\"1123\\",height:\\"1026\\"})}),`\\n`,(0,i.jsx)(e.h3,{id:\\"step-6-check-and-ensure-you-have-logs-in-elastic\\",children:\\"Step 6: Check and ensure you have logs in Elastic\\"}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Now that steps 1\\\\u20134 are complete, you can go to Elastic\\\\u2019s Discover capability and you should see VPC Flow Logs coming in. In the image below, we\\\\u2019ve filtered by Amazon S3 bucket \\",(0,i.jsx)(e.strong,{children:\\"bshettisartest\\"}),\\".\\"]}),`\\n`,(0,i.jsx)(e.p,{children:(0,i.jsx)(e.img,{src:\\"/assets/images/vpc-flow-logs-monitoring-analytics-observability/blog-elastic-vpc-flow-log-dashboard-filter.png\\",alt:\\"\\",width:\\"1780\\",height:\\"1158\\"})}),`\\n`,(0,i.jsx)(e.h2,{id:\\"conclusion-elastic-observability-easily-integrates-with-vpc-flow-logs-for-analytics-alerting-and-insights\\",children:\\"Conclusion: Elastic Observability easily integrates with VPC Flow Logs for analytics, alerting, and insights\\"}),`\\n`,(0,i.jsx)(e.p,{children:\\"I hope you\\\\u2019ve gotten an appreciation for how Elastic Observability can help you manage AWS VPC Flow Logs. Here\\\\u2019s a quick recap of lessons and what you learned:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsxs)(e.li,{children:[\\"A walk-through of how Elastic Observability provides enhanced analysis for VPC Flow Logs:\\",`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Using Elastic\\\\u2019s Analytics Discover capabilities to manually analyze the data\\"}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Leveraging Elastic Observability\\\\u2019s anomaly features to:\\",`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:\\"Identify anomalies in the VPC flow logs\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Detects anomalous log entry rates\\"}),`\\n`,(0,i.jsx)(e.li,{children:\\"Automatically categorizes log messages\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.li,{children:\\"Using an OOTB dashboard to further analyze data\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,i.jsx)(e.li,{children:\\"A more detailed walk-through of how to set up the Elastic Serverless Forwarder\\"}),`\\n`]}),`\\n`,(0,i.jsxs)(e.p,{children:[\\"Start your own \\",(0,i.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=5fbc596b-6d2a-433a-8333-0bd1f28e84da%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"7-day free trial\\"}),\\" by signing up via \\",(0,i.jsx)(e.a,{href:\\"https://aws.amazon.com/marketplace/pp/prodview-voru33wi6xs7k?trk=d54b31eb-671c-49ba-88bb-7a1106421dfa%E2%89%BBchannel=el\\",rel:\\"nofollow\\",children:\\"AWS Marketplace\\"}),\\" and quickly spin up a deployment in minutes on any of the \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/cloud/current/ec-reference-regions.html#ec_amazon_web_services_aws_regions\\",rel:\\"nofollow\\",children:\\"Elastic Cloud regions on AWS\\"}),\\" around the world. Your AWS Marketplace purchase of Elastic will be included in your monthly consolidated billing statement and will draw against your committed spend with AWS.\\"]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"additional-logging-resources\\",children:\\"Additional logging resources:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/getting-started/observability/collect-and-analyze-logs\\",rel:\\"nofollow\\",children:\\"Getting started with logging on Elastic (quickstart)\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/logs-metrics-get-started.html\\",rel:\\"nofollow\\",children:\\"Ingesting common known logs via integrations (compute node example)\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://docs.elastic.co/integrations\\",rel:\\"nofollow\\",children:\\"List of integrations\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/log-monitoring-management-enterprise\\",rel:\\"nofollow\\",children:\\"Ingesting custom application logs into Elastic\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-parsing-schema-read-write\\",rel:\\"nofollow\\",children:\\"Enriching logs in Elastic\\"})}),`\\n`,(0,i.jsxs)(e.li,{children:[\\"Analyzing Logs with \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/reduce-mttd-ml-machine-learning-observability\\",rel:\\"nofollow\\",children:\\"Anomaly Detection (ML)\\"}),\\" and \\",(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/observability-logs-machine-learning-aiops\\",rel:\\"nofollow\\",children:\\"AIOps\\"})]}),`\\n`]}),`\\n`,(0,i.jsx)(e.h3,{id:\\"common-use-case-examples-with-logs\\",children:\\"Common use case examples with logs:\\"}),`\\n`,(0,i.jsxs)(e.ul,{children:[`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://youtu.be/ax04ZFWqVCg\\",rel:\\"nofollow\\",children:\\"Nginx log management\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/vpc-flow-logs-monitoring-analytics-observability\\",rel:\\"nofollow\\",children:\\"AWS VPC Flow log management\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://www.elastic.co/blog/kubernetes-errors-elastic-observability-logs-openai\\",rel:\\"nofollow\\",children:\\"Using OpenAI to analyze Kubernetes errors\\"})}),`\\n`,(0,i.jsx)(e.li,{children:(0,i.jsx)(e.a,{href:\\"https://youtu.be/Li5TJAWbz8Q\\",rel:\\"nofollow\\",children:\\"PostgreSQL issue analysis with AIOps\\"})}),`\\n`]})]})}function d(n={}){let{wrapper:e}=n.components||{};return e?(0,i.jsx)(e,{...n,children:(0,i.jsx)(h,{...n})}):h(n)}return v(A);})();\\n;return Component;"},"_id":"articles/vpc-flow-logs-monitoring-analytics-observability.mdx","_raw":{"sourceFilePath":"articles/vpc-flow-logs-monitoring-analytics-observability.mdx","sourceFileName":"vpc-flow-logs-monitoring-analytics-observability.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/vpc-flow-logs-monitoring-analytics-observability"},"type":"Article","imageUrl":"/assets/images/vpc-flow-logs-monitoring-analytics-observability/patterns-midnight-background-no-logo-observability.png","readingTime":"12 min read","url":"/vpc-flow-logs-monitoring-analytics-observability","headings":[{"level":2,"title":"Elastic’s serverless forwarder on AWS Lambda","href":"#elastics-serverless-forwarder-on-aws-lambda"},{"level":2,"title":"Analyzing VPC Flow Logs in Elastic","href":"#analyzing-vpc-flow-logs-in-elastic"},{"level":3,"title":"Using Elastic Discover","href":"#using-elastic-discover"},{"level":3,"title":"Anomaly detection in Elastic Observability logs","href":"#anomaly-detection-in-elastic-observability-logs"},{"level":3,"title":"VPC Flow Log dashboard on Elastic Observability","href":"#vpc-flow-log-dashboard-on-elastic-observability"},{"level":2,"title":"Setting it all up","href":"#setting-it-all-up"},{"level":3,"title":"Prerequisites and config","href":"#prerequisites-and-config"},{"level":3,"title":"Step 0: Get an account on Elastic Cloud","href":"#step-0-get-an-account-on-elastic-cloud"},{"level":3,"title":"Step 1: Deploy Elastic on AWS","href":"#step-1-deploy-elastic-on-aws"},{"level":3,"title":"Step 2: Turn on Elastic’s AWS Integrations on AWS","href":"#step-2-turn-on-elastics-aws-integrations-on-aws"},{"level":3,"title":"Step 3: Deploy your application","href":"#step-3-deploy-your-application"},{"level":3,"title":"Step 4: Send VPC Flow Logs to Amazon S3 and set up Amazon SQS","href":"#step-4-send-vpc-flow-logs-to-amazon-s3-and-set-up-amazon-sqs"},{"level":3,"title":"Step 5: Set up Elastic Serverless Forwarder on AWS","href":"#step-5-set-up-elastic-serverless-forwarder-on-aws"},{"level":3,"title":"Step 6: Check and ensure you have logs in Elastic","href":"#step-6-check-and-ensure-you-have-logs-in-elastic"},{"level":2,"title":"Conclusion: Elastic Observability easily integrates with VPC Flow Logs for analytics, alerting, and insights","href":"#conclusion-elastic-observability-easily-integrates-with-vpc-flow-logs-for-analytics-alerting-and-insights"},{"level":3,"title":"Additional logging resources:","href":"#additional-logging-resources"},{"level":3,"title":"Common use case examples with logs:","href":"#common-use-case-examples-with-logs"}]},{"title":"Unlocking whole-system visibility with Elastic Universal Profiling™","slug":"whole-system-visibility-elastic-universal-profiling","date":"2023-09-25","description":"Visual profiling data can be overwhelming. This blog post aims to demystify continuous profiling and guide you through its unique visualizations. We will equip you with the knowledge to derive quick, actionable insights from Universal Profiling™.","image":"universal-profiling-blog-720x420.jpg","author":[{"slug":"israel-ogbole","type":"Author","_raw":{}},{"slug":"francesco-gualazzi","type":"Author","_raw":{}}],"subtitle":"Learn how to derive quick, actionable code insights with stacktraces, flamegraphs, and more","tags":[{"slug":"universal-profiling","type":"Tag","_raw":{}},{"slug":"apm","type":"Tag","_raw":{}}],"body":{"raw":"\\n## Identify, optimize, measure, repeat!\\n\\nSREs and developers who want to maintain robust, efficient systems and achieve optimal code performance need effective tools to measure and improve code performance. Profilers are invaluable for these tasks, as they can help you boost your app\'s throughput, ensure consistent system reliability, and gain a deeper understanding of your code\'s behavior at runtime. However, traditional profilers can be cumbersome to use, as they often require code recompilation and are limited to specific languages. Additionally, they can also have a high overhead that negatively affects performance and makes them less suitable for quick, real-time debugging in production environments.\\n\\nTo address the limitations of traditional profilers, Elastic\xae recently [announced the general availability of Elastic Universal Profiling](https://www.elastic.co/blog/continuous-profiling-is-generally-available), a [continuous profiling](https://www.elastic.co/observability/universal-profiling) product that is refreshingly straightforward to use, eliminating the need for instrumentation, recompilations, or restarts. Moreover, Elastic Universal Profiling does not require on-host debug symbols and is language-agnostic, allowing you to profile any process running on your machines — from your application\'s code to third-party libraries and even kernel functions.\\n\\nHowever, even the most advanced tools require a certain level of expertise to interpret the data effectively. The wealth of visual profiling data — flamegraphs, stacktraces, or functions — can initially seem overwhelming. This blog post aims to demystify [continuous profiling](https://www.elastic.co/observability/universal-profiling) and guide you through its unique visualizations. We will equip you with the knowledge to derive quick, actionable insights from Universal Profiling.\\n\\nLet’s begin.\\n\\n## Stacktraces: The cornerstone for profiling\\n\\n### It all begins with a stacktrace — a snapshot capturing the cascade of function calls.\\n\\nA stacktrace is a snapshot of the call stack of an application at a specific point in time. It captures the sequence of function calls that the program has made up to that point. In this way, a stacktrace serves as a historical record of the call stack, allowing you to trace back the steps that led to a particular state in your application.\\n\\nFurther, stacktraces are the foundational data structure that profilers rely on to determine what an application is executing at any given moment. This is particularly useful when, for instance, your infrastructure monitoring indicates that your application servers are consuming 95% of CPU resources. While utilities such as \'top -H\' can show the top processes that are consuming CPU, they lack the granularity needed to identify the specific lines of code (in the top process) responsible for the high usage.\\n\\nIn the case of Elastic Universal Profiling, [eBPF is used](https://www.elastic.co/blog/ebpf-observability-security-workload-profiling) to perform sampling of every process that is keeping a CPU core busy. Unlike most instrumentation profilers that focus solely on your application code, Elastic Universal Profiling provides whole-system visibility — it profiles not just your code, but also code you don\'t own, including third-party libraries and even kernel operations.\\n\\nThe diagram below shows how the Universal Profiling agent works at a very high level. Step 5 indicates the ingestion of the stacktraces into the profiling collector, a new part of the Elastic Stack.\\n\\n_ **Just** _ [_ **deploy the profiling host agent** _](https://www.elastic.co/guide/en/observability/current/profiling-get-started.html) _**and receive profiling data (in Kibana**_\xae_**) a few minutes later.**_ [_ **Get started now** _](https://www.elastic.co/guide/en/observability/current/profiling-get-started.html)_ **.** _\\n\\n![High-level depiction of how the profiling agent works](/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-1-flowchart-linux.png)\\n\\n1. Unwinder eBPF programs (bytecode) are sent to the kernel.\\n\\n2. The kernel verifies that the BPF program is safe. If accepted, the program is attached to the probes and executed when the event occurs.\\n\\n3. The eBPF programs pass the collected data to userspace via maps.\\n\\n4. The agent reads the collected data from maps. The data transferred from the agent to the maps are process-specific and interpreter-specific meta-information that help the eBPF unwinder programs perform unwinding.\\n\\n5. Stacktraces, metrics, and metadata are pushed to the Elastic Stack.\\n\\n6. Visualize data as flamegraphs, stacktraces, and functions via Kibana.\\n\\nWhile stacktraces are the key ingredient for most profiling tools, interpreting them can be tricky. Let\'s take a look at a simple example to make things a bit easier. The table below shows a group of stacktraces from a Java application and assigns each a percentage to indicate its share of CPU time consumption.\\n\\n**Table 1: Grouped Stacktraces with CPU Time Percentage**\\n\\n| Percentage | Function Calls |\\n| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |\\n| 60% | startApp -\\\\> authenticateUser -\\\\> processTransaction |\\n| 20% | startApp -\\\\> loadAccountDetails -\\\\> fetchRecentTransactions |\\n| 10% | startApp -\\\\> authenticateUser -\\\\> processTransaction -\\\\> verifyFunds |\\n| 2% | startApp -\\\\> authenticateUser -\\\\> processTransaction -\\\\>libjvm.so |\\n| 1% | startApp -\\\\> authenticateUser -\\\\> processTransaction -\\\\>libjvm.so -\\\\>vmlinux: asm_common_interrupt -\\\\>vmlinux: asm_sysvec_apic_timer_interrupt |\\n\\nThe percentages above represent the relative frequency of each specific stacktrace compared to the total number of stacktraces collected over the observation period, not actual CPU usage percentages. Also, the libjvm.so and kernel frames (vmlinux:\\\\*) in the example are commonly observed with whole-system profilers like Elastic Universal Profiling.\\n\\nAlso, we can see that **60%** of the time is spent in the sequence startApp; authenticateUser; processTransaction. An additional **10%** of the processing time is allocated to verifyFunds, a function invoked by processTransaction. Given these observations, it becomes evident that optimization initiatives would yield the most impact if centered on the processTransaction function, as it is one of the most expensive functions. However, real-world stacktraces can be far more intricate than this example. So how do we make sense of them quickly? The answer to this problem resulted in the creation of flamegraphs.\\n\\n## Flamegraphs: A visualization of stacktraces\\n\\nWhile the above example may appear straightforward, it scarcely reflects the complexities encountered when aggregating multiple stacktraces across a fleet of machines on a continuous basis. The depth of the stack traces and the numerous branching paths can make it increasingly difficult to pinpoint where code is consuming resources. This is where flamegraphs, a concept popularized by [Brendan Gregg](https://www.brendangregg.com/flamegraphs.html), come into play.\\n\\nA flamegraph is a visual interpretation of stacktraces, designed to quickly and accurately identify the functions that are consuming the most resources. Each function is represented by a rectangle, where the width of the rectangle represents the amount of time spent in the function, and the number of stacked rectangles represents the stack depth. The stack depth is the number of functions that were called to reach the current function.\\n\\nElastic Universal Profiling uses icicle graphs, which is an inverted variant of the standard flamegraph. In an icicle graph, the root function is at the top, and its child functions are shown below their parents –– making it easier to see the hierarchy of functions and how they are related to each other.\\n\\nIn most flamegraphs, the y-axis represents stack depth, but there is no standardization for the x-axis. Some profiling tools use the x-axis to indicate the passage of time; in these instances, the graph is more accurately termed a flame chart. Others sort the x-axis alphabetically. Universal Profiling sorts functions on the x-axis based on relative CPU percentage utilization, starting with the function that consumes the most CPU time on the left, as shown in the example icicle graph below.\\n\\n![Example icicle graph: The percentage represents relative CPU time, not the real CPU usage time. ](/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-2-cpu-time.png)\\n\\n## Debugging and optimizing performance issues: Stacktraces, TopN functions, flamegraphs\\n\\nSREs and SWEs can use Universal Profiling for troubleshooting, debugging, and performance optimization. It builds stacktraces that go from the kernel, through userspace native code, all the way into code running in higher level runtimes, enabling you to **identify performance regressions** , **reduce wasteful computations** , and **debug complex issues faster**.\\n\\nTo this end, Universal Profiling offers three main visualizations: Stacktraces, TopN Functions, and flamegraphs.\\n\\n### Stacktrace view\\n\\nThe stacktraces view shows grouped stacktrace graphs by threads, hosts, Kubernetes deployments, and containers. It can be used to detect unexpected CPU spikes across threads and drill down into a smaller time range to investigate further with a flamegraph. Refer to the [documentation](https://www.elastic.co/guide/en/observability/current/universal-profiling.html#profiling-stacktraces-intro) for details.\\n\\n![Notice the wave pattern in the stacktrace view, enabling you to drill down into a CPU spike ](/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-3-wave-patterns.png)\\n\\n### TopN functions view\\n\\nUniversal Profiling\'s topN functions view shows the most frequently sampled functions, broken down by CPU time, annualized CO2, and annualized cost estimates. You can use this view to identify the most expensive functions across your entire fleet, and then apply filters to focus on specific components for a more detailed analysis. Clicking on a function name will redirect you to the flamegraph, enabling you to examine the call hierarchy.\\n\\n![TopN functions page](/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-4-topN-functions-page.png)\\n\\n### Flamegraphs view\\n\\nThe flamegraph page is where you will most likely spend the most time, especially when debugging and optimizing. We recommend that you use the guide below to identify performance bottlenecks and optimization opportunities with flamegraphs. The three key elements-conditions to look for are **width** , **hierarchy** , and **height**.\\n\\n![Icicle flamegraph: We use the colors to determine different types of code (e.g., native, interpreted, kernel).](/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-5-icivle-flamegraph.png)\\n\\n**Width matters:** In icicle graphs, wider rectangles signify functions taking up more CPU time. Always read the graph from left to right and note the widest rectangles, as these are the prime hot spots.\\n\\n**Hierarchy matters:** Navigate the graph\'s stack to understand function relationships. This vertical examination will help you identify whether one or multiple functions are responsible for performance bottlenecks. This could also uncover opportunities for code improvements, such as swapping an inefficient library or avoiding unnecessary I/O operations.\\n\\n**Height matters:** Elevated or tall stacks in the graph usually point to deep call hierarchies. These can be an indicator of complex and less efficient code structures that may require attention.\\n\\nAlso, when navigating a flamegraph, you may want to look for specific function names to validate your assumptions on their presence: in the Universal Profiling flamegraphs view, there is a “Search” bar at the bottom left corner of the view. You can input a regex, and the match will be highlighted in the flamegraph; by clicking on the left and right arrows next to the Search bar, you can move across the occurrences on the flamegraph and spot callers and callee of the matched function.\\n\\nIn summary,\\n\\n- **Scan** horizontally from left to right, focusing on width for CPU-intensive functions.\\n- **Examine** vertically to examine the stack and spot bottlenecks.\\n- **Look** for **towering stacks** to identify potential complexities in the code.\\n\\nTo recap, use topN functions to generate optimization hypotheses and validate them with stacktraces and/or flamegraphs. Use stacktraces to monitor CPU utilization trends and to delve into the finer details. Use flamegraphs to quickly debug and optimize your code, using width, hierarchy, and height as guides.\\n\\n_ **Identify. Optimize. Measure. Repeat!** _\\n\\n## Measure the impact of your change\\n\\n### For the very first time in history, developers can now measure the performance (gained or lost), cloud cost, and carbon footprint impact of every deployed change.\\n\\nOnce you have identified a performance issue and applied fixes or optimizations to your code, it is essential to measure the impact of your changes. The differential topN functions and differential flamegraph pages are invaluable for this, as they can help you identify regressions and measure your change impact not only in terms of performance but also in terms of carbon emissions and cost savings.\\n\\n![A differential function view, showing the performance, CO2, and cost impact of a change](/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-6-uni-profiling.png)\\n\\nThe Diff column indicates a change in the function’s rank.\\n\\nYou may need to use tags or other metadata, such as container and deployment name, in combination with time ranges to differentiate between the optimized and non-optimized changes.\\n\\n![A differential flamegraph showing regression in A/B testing](/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-7-differential-flamegraph.png)\\n\\n## Universal Profiling: The key to optimizing application resources\\n\\nComputational efficiency is no longer just a nice-to-have, but a must-have from both a financial and environmental sustainability perspective. Elastic Universal Profiling provides unprecedented visibility into the runtime behavior of all your applications, so you can identify and optimize the most resource-intensive areas of your code. The result is not merely better-performing software but also reduced resource consumption, lower cloud costs, and a reduction in carbon footprint. Optimizing your code with Universal Profiling is not only the right thing to do for your business, it’s the right thing to do for our world.\\n\\n[Get started](https://www.elastic.co/guide/en/observability/current/profiling-get-started.html) with Elastic Universal Profiling today.\\n\\n_The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all._\\n","code":"var Component=(()=>{var p=Object.create;var r=Object.defineProperty;var u=Object.getOwnPropertyDescriptor;var g=Object.getOwnPropertyNames;var f=Object.getPrototypeOf,m=Object.prototype.hasOwnProperty;var y=(i,e)=>()=>(e||i((e={exports:{}}).exports,e),e.exports),v=(i,e)=>{for(var n in e)r(i,n,{get:e[n],enumerable:!0})},s=(i,e,n,o)=>{if(e&&typeof e==\\"object\\"||typeof e==\\"function\\")for(let a of g(e))!m.call(i,a)&&a!==n&&r(i,a,{get:()=>e[a],enumerable:!(o=u(e,a))||o.enumerable});return i};var w=(i,e,n)=>(n=i!=null?p(f(i)):{},s(e||!i||!i.__esModule?r(n,\\"default\\",{value:i,enumerable:!0}):n,i)),b=i=>s(r({},\\"__esModule\\",{value:!0}),i);var c=y((U,l)=>{l.exports=_jsx_runtime});var x={};v(x,{default:()=>d,frontmatter:()=>k});var t=w(c()),k={title:\\"Unlocking whole-system visibility with Elastic Universal Profiling\\\\u2122\\",slug:\\"whole-system-visibility-elastic-universal-profiling\\",date:\\"2023-09-25\\",subtitle:\\"Learn how to derive quick, actionable code insights with stacktraces, flamegraphs, and more\\",description:\\"Visual profiling data can be overwhelming. This blog post aims to demystify continuous profiling and guide you through its unique visualizations. We will equip you with the knowledge to derive quick, actionable insights from Universal Profiling\\\\u2122.\\",author:[{slug:\\"israel-ogbole\\"},{slug:\\"francesco-gualazzi\\"}],image:\\"universal-profiling-blog-720x420.jpg\\",tags:[{slug:\\"universal-profiling\\"},{slug:\\"apm\\"}]};function h(i){let e={a:\\"a\\",div:\\"div\\",em:\\"em\\",h2:\\"h2\\",h3:\\"h3\\",img:\\"img\\",li:\\"li\\",ol:\\"ol\\",p:\\"p\\",strong:\\"strong\\",table:\\"table\\",tbody:\\"tbody\\",td:\\"td\\",th:\\"th\\",thead:\\"thead\\",tr:\\"tr\\",ul:\\"ul\\",...i.components};return(0,t.jsxs)(t.Fragment,{children:[(0,t.jsx)(e.h2,{id:\\"identify-optimize-measure-repeat\\",children:\\"Identify, optimize, measure, repeat!\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"SREs and developers who want to maintain robust, efficient systems and achieve optimal code performance need effective tools to measure and improve code performance. Profilers are invaluable for these tasks, as they can help you boost your app\'s throughput, ensure consistent system reliability, and gain a deeper understanding of your code\'s behavior at runtime. However, traditional profilers can be cumbersome to use, as they often require code recompilation and are limited to specific languages. Additionally, they can also have a high overhead that negatively affects performance and makes them less suitable for quick, real-time debugging in production environments.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"To address the limitations of traditional profilers, Elastic\\",(0,t.jsx)(\\"sup\\",{children:\\"\\\\xAE\\"}),\\" recently \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/continuous-profiling-is-generally-available\\",rel:\\"nofollow\\",children:\\"announced the general availability of Elastic Universal Profiling\\"}),\\", a \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"continuous profiling\\"}),\\" product that is refreshingly straightforward to use, eliminating the need for instrumentation, recompilations, or restarts. Moreover, Elastic Universal Profiling does not require on-host debug symbols and is language-agnostic, allowing you to profile any process running on your machines \\\\u2014 from your application\'s code to third-party libraries and even kernel functions.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"However, even the most advanced tools require a certain level of expertise to interpret the data effectively. The wealth of visual profiling data \\\\u2014 flamegraphs, stacktraces, or functions \\\\u2014 can initially seem overwhelming. This blog post aims to demystify \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/observability/universal-profiling\\",rel:\\"nofollow\\",children:\\"continuous profiling\\"}),\\" and guide you through its unique visualizations. We will equip you with the knowledge to derive quick, actionable insights from Universal Profiling.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Let\\\\u2019s begin.\\"}),`\\n`,(0,t.jsx)(e.h2,{id:\\"stacktraces-the-cornerstone-for-profiling\\",children:\\"Stacktraces: The cornerstone for profiling\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"it-all-begins-with-a-stacktrace--a-snapshot-capturing-the-cascade-of-function-calls\\",children:\\"It all begins with a stacktrace \\\\u2014 a snapshot capturing the cascade of function calls.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"A stacktrace is a snapshot of the call stack of an application at a specific point in time. It captures the sequence of function calls that the program has made up to that point. In this way, a stacktrace serves as a historical record of the call stack, allowing you to trace back the steps that led to a particular state in your application.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Further, stacktraces are the foundational data structure that profilers rely on to determine what an application is executing at any given moment. This is particularly useful when, for instance, your infrastructure monitoring indicates that your application servers are consuming 95% of CPU resources. While utilities such as \'top -H\' can show the top processes that are consuming CPU, they lack the granularity needed to identify the specific lines of code (in the top process) responsible for the high usage.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"In the case of Elastic Universal Profiling, \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/blog/ebpf-observability-security-workload-profiling\\",rel:\\"nofollow\\",children:\\"eBPF is used\\"}),\\" to perform sampling of every process that is keeping a CPU core busy. Unlike most instrumentation profilers that focus solely on your application code, Elastic Universal Profiling provides whole-system visibility \\\\u2014 it profiles not just your code, but also code you don\'t own, including third-party libraries and even kernel operations.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"The diagram below shows how the Universal Profiling agent works at a very high level. Step 5 indicates the ingestion of the stacktraces into the profiling collector, a new part of the Elastic Stack.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Just\\"}),\\" _ \\",(0,t.jsxs)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/profiling-get-started.html\\",rel:\\"nofollow\\",children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"deploy the profiling host agent\\"}),\\" _\\"]}),\\" \\",(0,t.jsx)(e.em,{children:(0,t.jsx)(e.strong,{children:\\"and receive profiling data (in Kibana\\"})}),(0,t.jsx)(\\"sup\\",{children:(0,t.jsx)(\\"em\\",{children:\\"\\\\xAE\\"})}),(0,t.jsx)(e.em,{children:(0,t.jsx)(e.strong,{children:\\") a few minutes later.\\"})}),\\" \\",(0,t.jsxs)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/profiling-get-started.html\\",rel:\\"nofollow\\",children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Get started now\\"}),\\" _\\"]}),\\"_ \\",(0,t.jsx)(e.strong,{children:\\".\\"}),\\" _\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-1-flowchart-linux.png\\",alt:\\"High-level depiction of how the profiling agent works\\",width:\\"1256\\",height:\\"512\\"})}),`\\n`,(0,t.jsxs)(e.ol,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Unwinder eBPF programs (bytecode) are sent to the kernel.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"The kernel verifies that the BPF program is safe. If accepted, the program is attached to the probes and executed when the event occurs.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"The eBPF programs pass the collected data to userspace via maps.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"The agent reads the collected data from maps. The data transferred from the agent to the maps are process-specific and interpreter-specific meta-information that help the eBPF unwinder programs perform unwinding.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Stacktraces, metrics, and metadata are pushed to the Elastic Stack.\\"}),`\\n`]}),`\\n`,(0,t.jsxs)(e.li,{children:[`\\n`,(0,t.jsx)(e.p,{children:\\"Visualize data as flamegraphs, stacktraces, and functions via Kibana.\\"}),`\\n`]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"While stacktraces are the key ingredient for most profiling tools, interpreting them can be tricky. Let\'s take a look at a simple example to make things a bit easier. The table below shows a group of stacktraces from a Java application and assigns each a percentage to indicate its share of CPU time consumption.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.strong,{children:\\"Table 1: Grouped Stacktraces with CPU Time Percentage\\"})}),`\\n`,(0,t.jsx)(e.div,{className:\\"table-container\\",children:(0,t.jsxs)(e.table,{children:[(0,t.jsx)(e.thead,{children:(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.th,{children:\\"Percentage\\"}),(0,t.jsx)(e.th,{children:\\"Function Calls\\"})]})}),(0,t.jsxs)(e.tbody,{children:[(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"60%\\"}),(0,t.jsx)(e.td,{children:\\"startApp -> authenticateUser -> processTransaction\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"20%\\"}),(0,t.jsx)(e.td,{children:\\"startApp -> loadAccountDetails -> fetchRecentTransactions\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"10%\\"}),(0,t.jsx)(e.td,{children:\\"startApp -> authenticateUser -> processTransaction -> verifyFunds\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"2%\\"}),(0,t.jsx)(e.td,{children:\\"startApp -> authenticateUser -> processTransaction ->libjvm.so\\"})]}),(0,t.jsxs)(e.tr,{children:[(0,t.jsx)(e.td,{children:\\"1%\\"}),(0,t.jsx)(e.td,{children:\\"startApp -> authenticateUser -> processTransaction ->libjvm.so ->vmlinux: asm_common_interrupt ->vmlinux: asm_sysvec_apic_timer_interrupt\\"})]})]})]})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The percentages above represent the relative frequency of each specific stacktrace compared to the total number of stacktraces collected over the observation period, not actual CPU usage percentages. Also, the libjvm.so and kernel frames (vmlinux:*) in the example are commonly observed with whole-system profilers like Elastic Universal Profiling.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Also, we can see that \\",(0,t.jsx)(e.strong,{children:\\"60%\\"}),\\" of the time is spent in the sequence startApp; authenticateUser; processTransaction. An additional \\",(0,t.jsx)(e.strong,{children:\\"10%\\"}),\\" of the processing time is allocated to verifyFunds, a function invoked by processTransaction. Given these observations, it becomes evident that optimization initiatives would yield the most impact if centered on the processTransaction function, as it is one of the most expensive functions. However, real-world stacktraces can be far more intricate than this example. So how do we make sense of them quickly? The answer to this problem resulted in the creation of flamegraphs.\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"flamegraphs-a-visualization-of-stacktraces\\",children:\\"Flamegraphs: A visualization of stacktraces\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"While the above example may appear straightforward, it scarcely reflects the complexities encountered when aggregating multiple stacktraces across a fleet of machines on a continuous basis. The depth of the stack traces and the numerous branching paths can make it increasingly difficult to pinpoint where code is consuming resources. This is where flamegraphs, a concept popularized by \\",(0,t.jsx)(e.a,{href:\\"https://www.brendangregg.com/flamegraphs.html\\",rel:\\"nofollow\\",children:\\"Brendan Gregg\\"}),\\", come into play.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"A flamegraph is a visual interpretation of stacktraces, designed to quickly and accurately identify the functions that are consuming the most resources. Each function is represented by a rectangle, where the width of the rectangle represents the amount of time spent in the function, and the number of stacked rectangles represents the stack depth. The stack depth is the number of functions that were called to reach the current function.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Elastic Universal Profiling uses icicle graphs, which is an inverted variant of the standard flamegraph. In an icicle graph, the root function is at the top, and its child functions are shown below their parents \\\\u2013\\\\u2013 making it easier to see the hierarchy of functions and how they are related to each other.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In most flamegraphs, the y-axis represents stack depth, but there is no standardization for the x-axis. Some profiling tools use the x-axis to indicate the passage of time; in these instances, the graph is more accurately termed a flame chart. Others sort the x-axis alphabetically. Universal Profiling sorts functions on the x-axis based on relative CPU percentage utilization, starting with the function that consumes the most CPU time on the left, as shown in the example icicle graph below.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-2-cpu-time.png\\",alt:\\"Example icicle graph: The percentage represents relative CPU time, not the real CPU usage time. \\",width:\\"591\\",height:\\"504\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"debugging-and-optimizing-performance-issues-stacktraces-topn-functions-flamegraphs\\",children:\\"Debugging and optimizing performance issues: Stacktraces, TopN functions, flamegraphs\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"SREs and SWEs can use Universal Profiling for troubleshooting, debugging, and performance optimization. It builds stacktraces that go from the kernel, through userspace native code, all the way into code running in higher level runtimes, enabling you to \\",(0,t.jsx)(e.strong,{children:\\"identify performance regressions\\"}),\\" , \\",(0,t.jsx)(e.strong,{children:\\"reduce wasteful computations\\"}),\\" , and \\",(0,t.jsx)(e.strong,{children:\\"debug complex issues faster\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"To this end, Universal Profiling offers three main visualizations: Stacktraces, TopN Functions, and flamegraphs.\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"stacktrace-view\\",children:\\"Stacktrace view\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The stacktraces view shows grouped stacktrace graphs by threads, hosts, Kubernetes deployments, and containers. It can be used to detect unexpected CPU spikes across threads and drill down into a smaller time range to investigate further with a flamegraph. Refer to the \\",(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/universal-profiling.html#profiling-stacktraces-intro\\",rel:\\"nofollow\\",children:\\"documentation\\"}),\\" for details.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-3-wave-patterns.png\\",alt:\\"Notice the wave pattern in the stacktrace view, enabling you to drill down into a CPU spike \\",width:\\"1999\\",height:\\"870\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"topn-functions-view\\",children:\\"TopN functions view\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"Universal Profiling\'s topN functions view shows the most frequently sampled functions, broken down by CPU time, annualized CO\\",(0,t.jsx)(\\"sub\\",{children:\\"2\\"}),\\", and annualized cost estimates. You can use this view to identify the most expensive functions across your entire fleet, and then apply filters to focus on specific components for a more detailed analysis. Clicking on a function name will redirect you to the flamegraph, enabling you to examine the call hierarchy.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-4-topN-functions-page.png\\",alt:\\"TopN functions page\\",width:\\"1999\\",height:\\"1117\\"})}),`\\n`,(0,t.jsx)(e.h3,{id:\\"flamegraphs-view\\",children:\\"Flamegraphs view\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"The flamegraph page is where you will most likely spend the most time, especially when debugging and optimizing. We recommend that you use the guide below to identify performance bottlenecks and optimization opportunities with flamegraphs. The three key elements-conditions to look for are \\",(0,t.jsx)(e.strong,{children:\\"width\\"}),\\" , \\",(0,t.jsx)(e.strong,{children:\\"hierarchy\\"}),\\" , and \\",(0,t.jsx)(e.strong,{children:\\"height\\"}),\\".\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-5-icivle-flamegraph.png\\",alt:\\"Icicle flamegraph: We use the colors to determine different types of code (e.g., native, interpreted, kernel).\\",width:\\"1752\\",height:\\"1622\\"})}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Width matters:\\"}),\\" In icicle graphs, wider rectangles signify functions taking up more CPU time. Always read the graph from left to right and note the widest rectangles, as these are the prime hot spots.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Hierarchy matters:\\"}),\\" Navigate the graph\'s stack to understand function relationships. This vertical examination will help you identify whether one or multiple functions are responsible for performance bottlenecks. This could also uncover opportunities for code improvements, such as swapping an inefficient library or avoiding unnecessary I/O operations.\\"]}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.strong,{children:\\"Height matters:\\"}),\\" Elevated or tall stacks in the graph usually point to deep call hierarchies. These can be an indicator of complex and less efficient code structures that may require attention.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:\\"Also, when navigating a flamegraph, you may want to look for specific function names to validate your assumptions on their presence: in the Universal Profiling flamegraphs view, there is a \\\\u201CSearch\\\\u201D bar at the bottom left corner of the view. You can input a regex, and the match will be highlighted in the flamegraph; by clicking on the left and right arrows next to the Search bar, you can move across the occurrences on the flamegraph and spot callers and callee of the matched function.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"In summary,\\"}),`\\n`,(0,t.jsxs)(e.ul,{children:[`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Scan\\"}),\\" horizontally from left to right, focusing on width for CPU-intensive functions.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Examine\\"}),\\" vertically to examine the stack and spot bottlenecks.\\"]}),`\\n`,(0,t.jsxs)(e.li,{children:[(0,t.jsx)(e.strong,{children:\\"Look\\"}),\\" for \\",(0,t.jsx)(e.strong,{children:\\"towering stacks\\"}),\\" to identify potential complexities in the code.\\"]}),`\\n`]}),`\\n`,(0,t.jsx)(e.p,{children:\\"To recap, use topN functions to generate optimization hypotheses and validate them with stacktraces and/or flamegraphs. Use stacktraces to monitor CPU utilization trends and to delve into the finer details. Use flamegraphs to quickly debug and optimize your code, using width, hierarchy, and height as guides.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[\\"_ \\",(0,t.jsx)(e.strong,{children:\\"Identify. Optimize. Measure. Repeat!\\"}),\\" _\\"]}),`\\n`,(0,t.jsx)(e.h2,{id:\\"measure-the-impact-of-your-change\\",children:\\"Measure the impact of your change\\"}),`\\n`,(0,t.jsx)(e.h3,{id:\\"for-the-very-first-time-in-history-developers-can-now-measure-the-performance-gained-or-lost-cloud-cost-and-carbon-footprint-impact-of-every-deployed-change\\",children:\\"For the very first time in history, developers can now measure the performance (gained or lost), cloud cost, and carbon footprint impact of every deployed change.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Once you have identified a performance issue and applied fixes or optimizations to your code, it is essential to measure the impact of your changes. The differential topN functions and differential flamegraph pages are invaluable for this, as they can help you identify regressions and measure your change impact not only in terms of performance but also in terms of carbon emissions and cost savings.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-6-uni-profiling.png\\",alt:\\"A differential function view, showing the performance, CO2, and cost impact of a change\\",width:\\"1999\\",height:\\"1383\\"})}),`\\n`,(0,t.jsx)(e.p,{children:\\"The Diff column indicates a change in the function\\\\u2019s rank.\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"You may need to use tags or other metadata, such as container and deployment name, in combination with time ranges to differentiate between the optimized and non-optimized changes.\\"}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.img,{src:\\"/assets/images/whole-system-visibility-elastic-universal-profiling/elastic-blog-7-differential-flamegraph.png\\",alt:\\"A differential flamegraph showing regression in A/B testing\\",width:\\"1999\\",height:\\"1273\\"})}),`\\n`,(0,t.jsx)(e.h2,{id:\\"universal-profiling-the-key-to-optimizing-application-resources\\",children:\\"Universal Profiling: The key to optimizing application resources\\"}),`\\n`,(0,t.jsx)(e.p,{children:\\"Computational efficiency is no longer just a nice-to-have, but a must-have from both a financial and environmental sustainability perspective. Elastic Universal Profiling provides unprecedented visibility into the runtime behavior of all your applications, so you can identify and optimize the most resource-intensive areas of your code. The result is not merely better-performing software but also reduced resource consumption, lower cloud costs, and a reduction in carbon footprint. Optimizing your code with Universal Profiling is not only the right thing to do for your business, it\\\\u2019s the right thing to do for our world.\\"}),`\\n`,(0,t.jsxs)(e.p,{children:[(0,t.jsx)(e.a,{href:\\"https://www.elastic.co/guide/en/observability/current/profiling-get-started.html\\",rel:\\"nofollow\\",children:\\"Get started\\"}),\\" with Elastic Universal Profiling today.\\"]}),`\\n`,(0,t.jsx)(e.p,{children:(0,t.jsx)(e.em,{children:\\"The release and timing of any features or functionality described in this post remain at Elastic\'s sole discretion. Any features or functionality not currently available may not be delivered on time or at all.\\"})})]})}function d(i={}){let{wrapper:e}=i.components||{};return e?(0,t.jsx)(e,{...i,children:(0,t.jsx)(h,{...i})}):h(i)}return b(x);})();\\n;return Component;"},"_id":"articles/whole-system-visibility-elastic-universal-profiling.mdx","_raw":{"sourceFilePath":"articles/whole-system-visibility-elastic-universal-profiling.mdx","sourceFileName":"whole-system-visibility-elastic-universal-profiling.mdx","sourceFileDir":"articles","contentType":"mdx","flattenedPath":"articles/whole-system-visibility-elastic-universal-profiling"},"type":"Article","imageUrl":"/assets/images/whole-system-visibility-elastic-universal-profiling/universal-profiling-blog-720x420.jpg","readingTime":"14 min read","url":"/whole-system-visibility-elastic-universal-profiling","headings":[{"level":2,"title":"Identify, optimize, measure, repeat!","href":"#identify-optimize-measure-repeat"},{"level":2,"title":"Stacktraces: The cornerstone for profiling","href":"#stacktraces-the-cornerstone-for-profiling"},{"level":3,"title":"It all begins with a stacktrace — a snapshot capturing the cascade of function calls.","href":"#it-all-begins-with-a-stacktrace--a-snapshot-capturing-the-cascade-of-function-calls"},{"level":2,"title":"Flamegraphs: A visualization of stacktraces","href":"#flamegraphs-a-visualization-of-stacktraces"},{"level":2,"title":"Debugging and optimizing performance issues: Stacktraces, TopN functions, flamegraphs","href":"#debugging-and-optimizing-performance-issues-stacktraces-topn-functions-flamegraphs"},{"level":3,"title":"Stacktrace view","href":"#stacktrace-view"},{"level":3,"title":"TopN functions view","href":"#topn-functions-view"},{"level":3,"title":"Flamegraphs view","href":"#flamegraphs-view"},{"level":2,"title":"Measure the impact of your change","href":"#measure-the-impact-of-your-change"},{"level":3,"title":"For the very first time in history, developers can now measure the performance (gained or lost), cloud cost, and carbon footprint impact of every deployed change.","href":"#for-the-very-first-time-in-history-developers-can-now-measure-the-performance-gained-or-lost-cloud-cost-and-carbon-footprint-impact-of-every-deployed-change"},{"level":2,"title":"Universal Profiling: The key to optimizing application resources","href":"#universal-profiling-the-key-to-optimizing-application-resources"}]}]');function o(){return s.sort((e,t)=>new Date(t.date).getTime()-new Date(e.date).getTime()).map(e=>{let{author:t,tags:n,...a}=e,s=t.map(e=>i.find(t=>t.slug===e.slug)),o=n?n.map(e=>n.find(t=>t.slug===e.slug)):[];return{...a,author:s,tags:o}})}function r(){let{authorSlug:e,tagSlug:t,limit:n}=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},i=(e?o().filter(t=>t.author.some(t=>t.slug===e)):t?o().filter(e=>e.tags.some(e=>e.slug===t)):o()).map(e=>({title:e.title,date:e.date,description:e.description,subtitle:e.subtitle||"",slug:e.slug,imageUrl:e.imageUrl,featured:e.featured||!1,author:e.author,tags:e.tags}));return n?i.slice(0,n):i}function l(e){return a.find(t=>t.slug===e)}function c(){return a.sort((e,t)=>r({tagSlug:t.slug}).length-r({tagSlug:e.slug}).length).filter(e=>r({tagSlug:e.slug}).length>=5)}[...i,...a,...s]},8499:(e,t,n)=>{n.d(t,{h:()=>i});let i=e=>"".concat("/observability-labs").concat(e)}}]);